In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, 1024),
            nn.ELU(),
            nn.Linear(1024, 512),
            nn.ELU(),
            nn.Linear(512, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            #nn.Linear(policy_hidden_dim, policy_hidden_dim),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.1
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C3', 'C17', 'C32']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [7]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask (C3, C17, C32) pretrained (AngelaCNN) (3 noops after placement) r: -0.1"
                  }
              }

        },
        loggers=[WandbLogger],
        checkpoint_at_end=True)



Trial name,status,loc
PPO_my_env_d4877_00000,PENDING,


2021-10-28 21:05:50,841	INFO wandb.py:170 -- Already logged into W&B.
2021-10-28 21:05:50,851	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.6 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=154)[0m 2021-10-28 21:05:54,378	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=154)[0m 2021-10-28 21:05:54,378	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-28_21-07-08
  done: false
  episode_len_mean: 416.0
  episode_media: {}
  episode_reward_max: -40.1000000000003
  episode_reward_mean: -41.60000000000032
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.88278742896186
          entropy_coeff: 0.009999999999999998
          kl: 0.007874982552508255
          policy_loss: 0.06394314674867524
          total_loss: 0.23382639065384864
          vf_explained_var: 0.011645793914794922
          vf_loss: 0.19713611917363272
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1,68.3301,1000,-41.6,-40.1,-43.1,416


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-28_21-07-42
  done: false
  episode_len_mean: 399.0
  episode_media: {}
  episode_reward_max: -36.900000000000254
  episode_reward_mean: -40.5000000000003
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 5
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.86048350599077
          entropy_coeff: 0.009999999999999998
          kl: 0.010376032421635392
          policy_loss: 0.025860504930218062
          total_loss: 0.6812869512372547
          vf_explained_var: 0.04848691076040268
          vf_loss: 0.6819560726897584
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,2,101.994,2000,-40.5,-36.9,-43.1,399


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-28_21-08-06
  done: false
  episode_len_mean: 398.0
  episode_media: {}
  episode_reward_max: -36.900000000000254
  episode_reward_mean: -40.52857142857173
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 2
  episodes_total: 7
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.82120860947503
          entropy_coeff: 0.009999999999999998
          kl: 0.014211903997309497
          policy_loss: 0.052562744501564236
          total_loss: 0.4049812375671334
          vf_explained_var: 0.16837573051452637
          vf_loss: 0.3777881989048587
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,3,125.849,3000,-40.5286,-36.9,-43.1,398


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-28_21-08-28
  done: false
  episode_len_mean: 395.7
  episode_media: {}
  episode_reward_max: -36.900000000000254
  episode_reward_mean: -40.08000000000029
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 10
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.832862366570367
          entropy_coeff: 0.009999999999999998
          kl: 0.011986584484474013
          policy_loss: 0.04375076492627462
          total_loss: 0.48322617146703933
          vf_explained_var: 0.1435023844242096
          vf_loss: 0.46540671288967134
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,4,147.519,4000,-40.08,-36.9,-43.1,395.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-28_21-08-51
  done: false
  episode_len_mean: 396.0
  episode_media: {}
  episode_reward_max: -36.900000000000254
  episode_reward_mean: -40.02500000000029
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 2
  episodes_total: 12
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8134528345531886
          entropy_coeff: 0.009999999999999998
          kl: 0.013853511938048237
          policy_loss: -0.0696818729241689
          total_loss: 0.5041400071647432
          vf_explained_var: 0.3070794343948364
          vf_loss: 0.5991857040052613
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,5,170.734,5000,-40.025,-36.9,-43.1,396


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-28_21-09-15
  done: false
  episode_len_mean: 389.8666666666667
  episode_media: {}
  episode_reward_max: -35.500000000000234
  episode_reward_mean: -39.326666666666945
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 15
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7811918126212225
          entropy_coeff: 0.009999999999999998
          kl: 0.012420995884268038
          policy_loss: 0.03319838427835041
          total_loss: 0.6223251246743732
          vf_explained_var: -0.01896975003182888
          vf_loss: 0.6144544571845068
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,6,195.394,6000,-39.3267,-35.5,-43.1,389.867


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-28_21-09-40
  done: false
  episode_len_mean: 386.77777777777777
  episode_media: {}
  episode_reward_max: -35.500000000000234
  episode_reward_mean: -38.9611111111114
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 18
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7355202198028565
          entropy_coeff: 0.009999999999999998
          kl: 0.012152569671984952
          policy_loss: 0.05059103551838133
          total_loss: 0.7552937305635876
          vf_explained_var: -0.1375350058078766
          vf_loss: 0.7296273785125879
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,7,220.186,7000,-38.9611,-35.5,-43.1,386.778


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-28_21-10-07
  done: false
  episode_len_mean: 383.15
  episode_media: {}
  episode_reward_max: -33.70000000000021
  episode_reward_mean: -38.57000000000028
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 2
  episodes_total: 20
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7037376986609565
          entropy_coeff: 0.009999999999999998
          kl: 0.011121443443602767
          policy_loss: -0.059153061442905004
          total_loss: 0.7160981969700919
          vf_explained_var: 0.32490310072898865
          vf_loss: 0.800064338867863
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,8,246.913,8000,-38.57,-33.7,-43.1,383.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-28_21-10-31
  done: false
  episode_len_mean: 380.5652173913044
  episode_media: {}
  episode_reward_max: -33.70000000000021
  episode_reward_mean: -38.278260869565486
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 23
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.650817953215705
          entropy_coeff: 0.009999999999999998
          kl: 0.01132176628334004
          policy_loss: -0.10350540296898948
          total_loss: 1.0195556660493215
          vf_explained_var: 0.3146646022796631
          vf_loss: 1.1473048915465673
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,9,270.444,9000,-38.2783,-33.7,-43.1,380.565


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-28_21-10-55
  done: false
  episode_len_mean: 379.65384615384613
  episode_media: {}
  episode_reward_max: -33.70000000000021
  episode_reward_mean: -38.16153846153873
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 26
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.654410566223992
          entropy_coeff: 0.009999999999999998
          kl: 0.013525934685654276
          policy_loss: -0.02370535499519772
          total_loss: 1.1604473190175162
          vf_explained_var: 0.24057815968990326
          vf_loss: 1.2079915738768048
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 10000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,10,294.519,10000,-38.1615,-33.7,-43.1,379.654


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-28_21-11-19
  done: false
  episode_len_mean: 377.37931034482756
  episode_media: {}
  episode_reward_max: -33.70000000000021
  episode_reward_mean: -37.91379310344854
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 29
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6484999020894366
          entropy_coeff: 0.009999999999999998
          kl: 0.011142968485377061
          policy_loss: 0.05581546856297387
          total_loss: 0.9807923071914249
          vf_explained_var: -0.1688230037689209
          vf_loss: 0.9492332398063607
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,11,319.295,11000,-37.9138,-33.7,-43.1,377.379




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-28_21-12-08
  done: false
  episode_len_mean: 372.9375
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -37.45312500000026
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 32
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.659366610315111
          entropy_coeff: 0.009999999999999998
          kl: 0.01330094810278439
          policy_loss: -0.02456741217109892
          total_loss: 0.7791001078155305
          vf_explained_var: -0.05371970683336258
          vf_loss: 0.8276009923881955
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,12,368.196,12000,-37.4531,-29.1,-43.1,372.938


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-28_21-12-42
  done: false
  episode_len_mean: 369.9142857142857
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -37.13714285714312
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 35
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6311811447143554
          entropy_coeff: 0.009999999999999998
          kl: 0.011282869165829289
          policy_loss: 0.007885480423768361
          total_loss: 0.5257033940818575
          vf_explained_var: 0.7731040716171265
          vf_loss: 0.5418731550375621
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,13,402.244,13000,-37.1371,-29.1,-43.1,369.914


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-28_21-13-31
  done: false
  episode_len_mean: 367.7368421052632
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -36.907894736842366
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 38
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.689875759018792
          entropy_coeff: 0.009999999999999998
          kl: 0.00956556272224917
          policy_loss: -0.008623478789296415
          total_loss: 0.44847956591596205
          vf_explained_var: 0.818863570690155
          vf_loss: 0.4820886934796969
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,14,450.884,14000,-36.9079,-29.1,-43.1,367.737


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-28_21-14-13
  done: false
  episode_len_mean: 364.5365853658537
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -36.578048780488054
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 41
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.678062473403083
          entropy_coeff: 0.009999999999999998
          kl: 0.010212292188498336
          policy_loss: 0.05624028891324997
          total_loss: 0.5060238169299232
          vf_explained_var: 0.7725417017936707
          vf_loss: 0.4745216932561662
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,15,492.759,15000,-36.578,-29.1,-43.1,364.537


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-28_21-14-42
  done: false
  episode_len_mean: 360.90909090909093
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -36.206818181818434
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 44
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6417322264777288
          entropy_coeff: 0.009999999999999998
          kl: 0.010672259718076319
          policy_loss: 0.04120845074454944
          total_loss: 0.41682066321372985
          vf_explained_var: 0.819794237613678
          vf_loss: 0.39989508299363985
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,16,522.007,16000,-36.2068,-29.1,-43.1,360.909


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-28_21-15-08
  done: false
  episode_len_mean: 359.51063829787233
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -36.05957446808536
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 47
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6452527470058866
          entropy_coeff: 0.009999999999999998
          kl: 0.010559942501691328
          policy_loss: 0.08067785849173864
          total_loss: 0.5288372894128164
          vf_explained_var: 0.7461925745010376
          vf_loss: 0.47249996807012296
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 17000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,17,547.257,17000,-36.0596,-29.1,-43.1,359.511


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-28_21-15-31
  done: false
  episode_len_mean: 358.08
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -35.91000000000024
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 50
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6626096301608615
          entropy_coeff: 0.009999999999999998
          kl: 0.01233807987377473
          policy_loss: 0.06898101468880971
          total_loss: 0.611147889494896
          vf_explained_var: 0.6446516513824463
          vf_loss: 0.5663253563766678
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 18000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,18,570.922,18000,-35.91,-29.1,-43.1,358.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-28_21-15-53
  done: false
  episode_len_mean: 356.77358490566036
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -35.77358490566062
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 53
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6278851562076144
          entropy_coeff: 0.009999999999999998
          kl: 0.010464068234003636
          policy_loss: 0.04716617912054062
          total_loss: 0.7121145519945357
          vf_explained_var: 0.4920633137226105
          vf_loss: 0.6891344039597445
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,19,592.853,19000,-35.7736,-29.1,-43.1,356.774


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-28_21-16-19
  done: false
  episode_len_mean: 355.6607142857143
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -35.65714285714309
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 56
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5563974380493164
          entropy_coeff: 0.009999999999999998
          kl: 0.012953751255651832
          policy_loss: 0.03743774460421668
          total_loss: 0.75457557340463
          vf_explained_var: 0.4752698540687561
          vf_loss: 0.7401110653248099
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,20,618.962,20000,-35.6571,-29.1,-43.1,355.661


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-28_21-16-44
  done: false
  episode_len_mean: 355.03389830508473
  episode_media: {}
  episode_reward_max: -29.100000000000144
  episode_reward_mean: -35.58983050847482
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 59
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.576778464847141
          entropy_coeff: 0.009999999999999998
          kl: 0.014376827011155393
          policy_loss: 0.07742634879218208
          total_loss: 0.5326058894395829
          vf_explained_var: 0.5053369998931885
          vf_loss: 0.47807195362531474
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,21,643.388,21000,-35.5898,-29.1,-43.1,355.034




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-28_21-17-24
  done: false
  episode_len_mean: 353.9516129032258
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -35.47741935483895
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 62
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5785304890738594
          entropy_coeff: 0.009999999999999998
          kl: 0.009947506588352834
          policy_loss: 0.028890516691737703
          total_loss: 0.8994660384125179
          vf_explained_var: 0.3066878914833069
          vf_loss: 0.8943713279234038
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,22,683.415,22000,-35.4774,-28.7,-43.1,353.952


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-28_21-17-47
  done: false
  episode_len_mean: 353.0769230769231
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -35.38615384615407
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 65
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.576524758338928
          entropy_coeff: 0.009999999999999998
          kl: 0.011781017627391864
          policy_loss: 0.05378456918729676
          total_loss: 0.8107512268755172
          vf_explained_var: 0.26010239124298096
          vf_loss: 0.780375697877672
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,23,706.722,23000,-35.3862,-28.7,-43.1,353.077


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-28_21-18-10
  done: false
  episode_len_mean: 352.4117647058824
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -35.31617647058846
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 68
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.600353421105279
          entropy_coeff: 0.009999999999999998
          kl: 0.012612744740769226
          policy_loss: 0.03996453301774131
          total_loss: 0.9708688368399938
          vf_explained_var: 0.156180739402771
          vf_loss: 0.9543852945168813
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,24,729.446,24000,-35.3162,-28.7,-43.1,352.412


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-28_21-18-32
  done: false
  episode_len_mean: 352.4428571428571
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -35.317142857143075
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 2
  episodes_total: 70
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.586912965774536
          entropy_coeff: 0.009999999999999998
          kl: 0.014264358179773654
          policy_loss: -0.11692403985394371
          total_loss: 0.9041827539602916
          vf_explained_var: 0.3169472813606262
          vf_loss: 1.0441230718460348
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,25,751.339,25000,-35.3171,-28.7,-43.1,352.443


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-28_21-18-56
  done: false
  episode_len_mean: 352.26027397260276
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -35.29589041095913
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 73
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.535362301932441
          entropy_coeff: 0.009999999999999998
          kl: 0.011423184139340018
          policy_loss: -0.11194570983449618
          total_loss: 1.324067277378506
          vf_explained_var: 0.20366458594799042
          vf_loss: 1.4590819928381178
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,26,775.104,26000,-35.2959,-28.7,-43.1,352.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-28_21-19-19
  done: false
  episode_len_mean: 351.6578947368421
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -35.23289473684233
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 76
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5336759063932632
          entropy_coeff: 0.009999999999999998
          kl: 0.013569403444569004
          policy_loss: -0.10045093595981598
          total_loss: 1.0237824870480432
          vf_explained_var: 0.19680608808994293
          vf_loss: 1.1468562954001957
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,27,798.185,27000,-35.2329,-28.7,-43.1,351.658


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-28_21-19-42
  done: false
  episode_len_mean: 350.69620253164555
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -35.1341772151901
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 79
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.496522906091478
          entropy_coeff: 0.009999999999999998
          kl: 0.013713164482397053
          policy_loss: -0.14079705658886169
          total_loss: 1.2342474831475152
          vf_explained_var: 0.22220586240291595
          vf_loss: 1.3972671336597866
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,28,821.917,28000,-35.1342,-28.7,-43.1,350.696


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-28_21-20-04
  done: false
  episode_len_mean: 349.9268292682927
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -35.05487804878071
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 82
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4782706525590683
          entropy_coeff: 0.009999999999999998
          kl: 0.008922455155308068
          policy_loss: -0.13883881701363457
          total_loss: 1.144340576065911
          vf_explained_var: 0.3158629834651947
          vf_loss: 1.3061775975757175
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,29,843.799,29000,-35.0549,-28.7,-43.1,349.927


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-28_21-20-27
  done: false
  episode_len_mean: 349.5882352941176
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -35.01882352941198
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 85
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.44829212029775
          entropy_coeff: 0.009999999999999998
          kl: 0.011518112561324756
          policy_loss: -0.12454286093513171
          total_loss: 1.0705985850758022
          vf_explained_var: 0.3736623227596283
          vf_loss: 1.2173207494947644
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,30,866.318,30000,-35.0188,-28.7,-43.1,349.588


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-28_21-20-51
  done: false
  episode_len_mean: 348.77272727272725
  episode_media: {}
  episode_reward_max: -28.700000000000138
  episode_reward_mean: -34.9352272727275
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 88
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.385862686898973
          entropy_coeff: 0.009999999999999998
          kl: 0.009531052148777459
          policy_loss: -0.12302941944864061
          total_loss: 0.9566888795958625
          vf_explained_var: 0.4550526440143585
          vf_loss: 1.101670714881685
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,31,890.111,31000,-34.9352,-28.7,-43.1,348.773




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-28_21-21-32
  done: false
  episode_len_mean: 346.94565217391306
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -34.750000000000234
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 92
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.314171324835883
          entropy_coeff: 0.009999999999999998
          kl: 0.009045755616975527
          policy_loss: 0.05559062245819304
          total_loss: 1.0982172906398773
          vf_explained_var: 0.45310449600219727
          vf_loss: 1.0639592362774744
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,32,931.183,32000,-34.75,-26.9,-43.1,346.946


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-28_21-21-56
  done: false
  episode_len_mean: 346.05263157894734
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -34.65894736842129
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 95
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.319143544303046
          entropy_coeff: 0.009999999999999998
          kl: 0.009322570992289848
          policy_loss: 0.05685060587194231
          total_loss: 1.03943373825815
          vf_explained_var: 0.5169036984443665
          vf_loss: 1.0039100732240411
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,33,954.844,33000,-34.6589,-26.9,-43.1,346.053


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-28_21-22-19
  done: false
  episode_len_mean: 345.16326530612247
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -34.568367346939
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 98
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.415402804480659
          entropy_coeff: 0.009999999999999998
          kl: 0.01129275786223521
          policy_loss: 0.06335956305265426
          total_loss: 0.8950684517621994
          vf_explained_var: 0.6067785620689392
          vf_loss: 0.8536043503218227
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,34,978.159,34000,-34.5684,-26.9,-43.1,345.163


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-28_21-22-42
  done: false
  episode_len_mean: 343.8
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -34.43100000000021
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 101
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.316639200846354
          entropy_coeff: 0.009999999999999998
          kl: 0.006280942947079874
          policy_loss: -0.02230103976196713
          total_loss: 0.7306026051441828
          vf_explained_var: 0.6517929434776306
          vf_loss: 0.774813846581512
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,35,1001.65,35000,-34.431,-26.9,-43.1,343.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-28_21-23-06
  done: false
  episode_len_mean: 341.69
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -34.19000000000021
  episode_reward_min: -41.30000000000032
  episodes_this_iter: 3
  episodes_total: 104
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.265788515408834
          entropy_coeff: 0.009999999999999998
          kl: 0.011522702172233482
          policy_loss: -0.07669436410069466
          total_loss: 1.0105749587217967
          vf_explained_var: 0.5423334836959839
          vf_loss: 1.1076226671536764
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,36,1025.62,36000,-34.19,-26.9,-41.3,341.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-28_21-23-31
  done: false
  episode_len_mean: 338.13
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.81300000000021
  episode_reward_min: -40.500000000000306
  episodes_this_iter: 4
  episodes_total: 108
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.2476743910047743
          entropy_coeff: 0.009999999999999998
          kl: 0.005196237815781846
          policy_loss: -0.0827462355295817
          total_loss: 0.773222475581699
          vf_explained_var: 0.686791181564331
          vf_loss: 0.8774062077204386
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,37,1050.24,37000,-33.813,-26.9,-40.5,338.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-28_21-23-55
  done: false
  episode_len_mean: 335.45
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.54500000000021
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 111
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3108951409657794
          entropy_coeff: 0.009999999999999998
          kl: 0.004992610996819962
          policy_loss: -0.046190379725562204
          total_loss: 0.2781358497010337
          vf_explained_var: 0.8663334250450134
          vf_loss: 0.3464366601573096
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,38,1073.97,38000,-33.545,-26.9,-39,335.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-28_21-24-19
  done: false
  episode_len_mean: 333.6
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.3600000000002
  episode_reward_min: -38.00000000000027
  episodes_this_iter: 3
  episodes_total: 114
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2889987760119967
          entropy_coeff: 0.009999999999999998
          kl: 0.01155382212085689
          policy_loss: 0.039248537520567575
          total_loss: 0.49229744176069895
          vf_explained_var: 0.8076778650283813
          vf_loss: 0.4747835099697113
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,39,1098.13,39000,-33.36,-26.9,-38,333.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-28_21-24-41
  done: false
  episode_len_mean: 332.02
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.2020000000002
  episode_reward_min: -38.00000000000027
  episodes_this_iter: 3
  episodes_total: 117
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3351030376222397
          entropy_coeff: 0.009999999999999998
          kl: 0.016542696374362363
          policy_loss: 0.05393543508317736
          total_loss: 0.8589701731999715
          vf_explained_var: 0.5844866037368774
          vf_loss: 0.826731503340933
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,40,1120.37,40000,-33.202,-26.9,-38,332.02




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-28_21-25-21
  done: false
  episode_len_mean: 330.85
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.08500000000021
  episode_reward_min: -38.00000000000027
  episodes_this_iter: 3
  episodes_total: 120
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.3954852342605593
          entropy_coeff: 0.009999999999999998
          kl: 0.010149723037187879
          policy_loss: -0.053198804871903524
          total_loss: 1.1060489343272315
          vf_explained_var: 0.36552339792251587
          vf_loss: 1.1821876088778178
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,41,1160.28,41000,-33.085,-26.9,-38,330.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-28_21-25-43
  done: false
  episode_len_mean: 330.4
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.0400000000002
  episode_reward_min: -38.00000000000027
  episodes_this_iter: 3
  episodes_total: 123
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2937594095865887
          entropy_coeff: 0.009999999999999998
          kl: 0.010991536286649426
          policy_loss: 0.015097550716665057
          total_loss: 1.037689787811703
          vf_explained_var: 0.17967063188552856
          vf_loss: 1.0444306805729866
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,42,1182.62,42000,-33.04,-26.9,-38,330.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-28_21-26-06
  done: false
  episode_len_mean: 329.66
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.9660000000002
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 3
  episodes_total: 126
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.322107508447435
          entropy_coeff: 0.009999999999999998
          kl: 0.011247740704631286
          policy_loss: 0.04283268286122216
          total_loss: 1.0417325880792405
          vf_explained_var: 0.20673932135105133
          vf_loss: 1.0209962016178502
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,43,1204.71,43000,-32.966,-26.9,-36.7,329.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-28_21-26-27
  done: false
  episode_len_mean: 329.52
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.952000000000204
  episode_reward_min: -37.50000000000026
  episodes_this_iter: 3
  episodes_total: 129
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.297009547551473
          entropy_coeff: 0.009999999999999998
          kl: 0.010511895241275226
          policy_loss: 0.046049951679176755
          total_loss: 1.123731396595637
          vf_explained_var: 0.16700918972492218
          vf_loss: 1.0996003599216542
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,44,1225.9,44000,-32.952,-26.9,-37.5,329.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-28_21-26-49
  done: false
  episode_len_mean: 329.92
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.992000000000196
  episode_reward_min: -37.50000000000026
  episodes_this_iter: 3
  episodes_total: 132
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1703654368718466
          entropy_coeff: 0.009999999999999998
          kl: 0.012042939792399803
          policy_loss: 0.05162454040514098
          total_loss: 1.1610444234477149
          vf_explained_var: 0.2087228000164032
          vf_loss: 1.1299192432728078
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,45,1248.49,45000,-32.992,-26.9,-37.5,329.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-28_21-27-12
  done: false
  episode_len_mean: 329.73
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.973000000000205
  episode_reward_min: -37.50000000000026
  episodes_this_iter: 3
  episodes_total: 135
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1465339766608342
          entropy_coeff: 0.009999999999999998
          kl: 0.011357304614979412
          policy_loss: 0.04350736373000675
          total_loss: 1.1642110678884718
          vf_explained_var: 0.03678888455033302
          vf_loss: 1.141033293803533
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,46,1270.94,46000,-32.973,-26.9,-37.5,329.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-28_21-27-32
  done: false
  episode_len_mean: 330.65
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.0650000000002
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 2
  episodes_total: 137
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1311879211001927
          entropy_coeff: 0.009999999999999998
          kl: 0.014068092791719848
          policy_loss: -0.10393086042669085
          total_loss: 1.0696251852644814
          vf_explained_var: 0.21020211279392242
          vf_loss: 1.1934611139198144
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,47,1291.34,47000,-33.065,-26.9,-38.6,330.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-28_21-27-55
  done: false
  episode_len_mean: 330.65
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.0650000000002
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 140
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0551352898279824
          entropy_coeff: 0.009999999999999998
          kl: 0.011549218671625264
          policy_loss: -0.11782663481103049
          total_loss: 1.4893811610009935
          vf_explained_var: 0.09883087128400803
          vf_loss: 1.6266042047076754
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,48,1314.16,48000,-33.065,-26.9,-38.6,330.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-28_21-28-17
  done: false
  episode_len_mean: 331.44
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.1440000000002
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 143
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9703804320759244
          entropy_coeff: 0.009999999999999998
          kl: 0.013377031184676645
          policy_loss: -0.11820833467774922
          total_loss: 1.373376226425171
          vf_explained_var: 0.17115427553653717
          vf_loss: 1.5099506629837884
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,49,1336.22,49000,-33.144,-26.9,-38.6,331.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-28_21-28-41
  done: false
  episode_len_mean: 331.16
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -33.116000000000206
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 146
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8696021556854248
          entropy_coeff: 0.009999999999999998
          kl: 0.014037828844636656
          policy_loss: -0.13250942279895148
          total_loss: 1.1915510323312548
          vf_explained_var: 0.2539052665233612
          vf_loss: 1.3413526932398478
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,50,1360,50000,-33.116,-26.9,-38.6,331.16




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-28_21-29-20
  done: false
  episode_len_mean: 329.85
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.9850000000002
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 150
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8239684383074442
          entropy_coeff: 0.009999999999999998
          kl: 0.012617039501294119
          policy_loss: 0.03286507046884961
          total_loss: 1.0542195985714595
          vf_explained_var: 0.48206764459609985
          vf_loss: 1.0383325063520008
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,51,1399.3,51000,-32.985,-26.9,-38.6,329.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-28_21-29-46
  done: false
  episode_len_mean: 329.13
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.913000000000196
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 153
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9964495129055446
          entropy_coeff: 0.009999999999999998
          kl: 0.011874659164995234
          policy_loss: 0.017354031403859455
          total_loss: 0.7451000223557155
          vf_explained_var: 0.7342025637626648
          vf_loss: 0.7465230176846186
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,52,1424.8,52000,-32.913,-26.9,-38.6,329.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-28_21-30-09
  done: false
  episode_len_mean: 328.62
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.862000000000194
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 156
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.107845669322544
          entropy_coeff: 0.009999999999999998
          kl: 0.007151672995552764
          policy_loss: -0.0014792045785321129
          total_loss: 0.6661411997344758
          vf_explained_var: 0.7258931398391724
          vf_loss: 0.6879836902022362
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,53,1448.26,53000,-32.862,-26.9,-38.6,328.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-28_21-30-32
  done: false
  episode_len_mean: 328.11
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.81100000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 159
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1772134913338554
          entropy_coeff: 0.009999999999999998
          kl: 0.009707941240126791
          policy_loss: -0.079423545466529
          total_loss: 0.5649584664238824
          vf_explained_var: 0.7980865240097046
          vf_loss: 0.6651833570665784
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,54,1471.23,54000,-32.811,-26.9,-38.6,328.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-28_21-30-56
  done: false
  episode_len_mean: 327.6
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.76000000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 162
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.106652002864414
          entropy_coeff: 0.009999999999999998
          kl: 0.011290229559602436
          policy_loss: -0.06917441657019986
          total_loss: 0.9080061283376482
          vf_explained_var: 0.6376566290855408
          vf_loss: 0.9971180333031548
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,55,1494.74,55000,-32.76,-26.9,-38.6,327.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-28_21-31-21
  done: false
  episode_len_mean: 326.3
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.630000000000194
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 166
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.054002579053243
          entropy_coeff: 0.009999999999999998
          kl: 0.0075288117316086
          policy_loss: 0.05664585563871596
          total_loss: 0.6924005389213562
          vf_explained_var: 0.7800561785697937
          vf_loss: 0.6555418309238222
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,56,1519.42,56000,-32.63,-26.9,-38.6,326.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-28_21-31-45
  done: false
  episode_len_mean: 324.76
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.47600000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 169
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2132363080978394
          entropy_coeff: 0.009999999999999998
          kl: 0.016534643633091012
          policy_loss: 0.03881730834643046
          total_loss: 0.5733508153094186
          vf_explained_var: 0.8115670680999756
          vf_loss: 0.5550124045047495
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,57,1544.14,57000,-32.476,-26.9,-38.6,324.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-28_21-32-08
  done: false
  episode_len_mean: 324.12
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.41200000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 172
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.247105089823405
          entropy_coeff: 0.009999999999999998
          kl: 0.01171786271798866
          policy_loss: 0.017912277579307558
          total_loss: 1.1015943454371557
          vf_explained_var: 0.2965754270553589
          vf_loss: 1.104981346262826
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,58,1566.93,58000,-32.412,-26.9,-38.6,324.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-28_21-32-30
  done: false
  episode_len_mean: 324.11
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.4110000000002
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 175
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.246776843070984
          entropy_coeff: 0.009999999999999998
          kl: 0.0072900074420071135
          policy_loss: 0.0008559289077917734
          total_loss: 0.9580999778376685
          vf_explained_var: 0.3484567403793335
          vf_loss: 0.9789828037222227
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,59,1588.29,59000,-32.411,-26.9,-38.6,324.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-28_21-32-52
  done: false
  episode_len_mean: 324.48
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.4480000000002
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 178
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2088678068584864
          entropy_coeff: 0.009999999999999998
          kl: 0.007498480679520409
          policy_loss: 0.06809613439771864
          total_loss: 1.0464882360564338
          vf_explained_var: 0.3593083322048187
          vf_loss: 0.999730952994691
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,60,1610.44,60000,-32.448,-26.9,-38.6,324.48




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-28_21-33-32
  done: false
  episode_len_mean: 324.45
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.44500000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 181
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.2296992037031385
          entropy_coeff: 0.009999999999999998
          kl: 0.011150019255723306
          policy_loss: 0.025965088771449193
          total_loss: 1.063981181383133
          vf_explained_var: 0.3062972128391266
          vf_loss: 1.0591980909307799
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,61,1651.16,61000,-32.445,-26.9,-38.6,324.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-28_21-33-57
  done: false
  episode_len_mean: 323.81
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.38100000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 184
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.243997155295478
          entropy_coeff: 0.009999999999999998
          kl: 0.01161126878260461
          policy_loss: -0.09301814908782641
          total_loss: 1.4477677716149224
          vf_explained_var: 0.08091127872467041
          vf_loss: 1.5620647695329455
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,62,1675.63,62000,-32.381,-26.9,-38.6,323.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-28_21-34-20
  done: false
  episode_len_mean: 323.66
  episode_media: {}
  episode_reward_max: -26.900000000000112
  episode_reward_mean: -32.36600000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 187
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.209180410703023
          entropy_coeff: 0.009999999999999998
          kl: 0.014585022349046796
          policy_loss: -0.11360630906290478
          total_loss: 1.4463381475872463
          vf_explained_var: 0.0787678062915802
          vf_loss: 1.5805777629216513
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,63,1699.05,63000,-32.366,-26.9,-38.6,323.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-28_21-34-44
  done: false
  episode_len_mean: 323.6
  episode_media: {}
  episode_reward_max: -27.000000000000114
  episode_reward_mean: -32.360000000000184
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 191
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1835749414232044
          entropy_coeff: 0.009999999999999998
          kl: 0.01434731449241795
          policy_loss: -0.00785576601823171
          total_loss: 1.4574213027954102
          vf_explained_var: 0.11823359876871109
          vf_loss: 1.4856780860159131
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,64,1722.98,64000,-32.36,-27,-38.6,323.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-28_21-35-10
  done: false
  episode_len_mean: 322.85
  episode_media: {}
  episode_reward_max: -27.000000000000114
  episode_reward_mean: -32.28500000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 194
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1637867079840767
          entropy_coeff: 0.009999999999999998
          kl: 0.011987306199656168
          policy_loss: 0.045306528939141166
          total_loss: 0.9864568481842677
          vf_explained_var: 0.26460185647010803
          vf_loss: 0.9615894357363383
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,65,1748.8,65000,-32.285,-27,-38.6,322.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-28_21-35-35
  done: false
  episode_len_mean: 322.34
  episode_media: {}
  episode_reward_max: -27.000000000000114
  episode_reward_mean: -32.234000000000194
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 197
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.14357713593377
          entropy_coeff: 0.009999999999999998
          kl: 0.012934766899470576
          policy_loss: -0.10519245854682392
          total_loss: 1.340475114186605
          vf_explained_var: 0.17202036082744598
          vf_loss: 1.465809883011712
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,66,1773.98,66000,-32.234,-27,-38.6,322.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-28_21-36-01
  done: false
  episode_len_mean: 321.33
  episode_media: {}
  episode_reward_max: -27.000000000000114
  episode_reward_mean: -32.13300000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 201
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.1319677856233383
          entropy_coeff: 0.009999999999999998
          kl: 0.00858089145582029
          policy_loss: 0.02713982197973463
          total_loss: 1.1041377584139507
          vf_explained_var: 0.4069058299064636
          vf_loss: 1.0974595215585496
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,67,1799.36,67000,-32.133,-27,-38.6,321.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-28_21-36-26
  done: false
  episode_len_mean: 320.19
  episode_media: {}
  episode_reward_max: -27.000000000000114
  episode_reward_mean: -32.01900000000018
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 204
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.084461294280158
          entropy_coeff: 0.009999999999999998
          kl: 0.012333820173348005
          policy_loss: -0.015023042261600495
          total_loss: 0.6828117549419404
          vf_explained_var: 0.6611867547035217
          vf_loss: 0.7174460169341829
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,68,1824.85,68000,-32.019,-27,-38.6,320.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-28_21-36-52
  done: false
  episode_len_mean: 319.89
  episode_media: {}
  episode_reward_max: -27.000000000000114
  episode_reward_mean: -31.98900000000019
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 208
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.048669698503282
          entropy_coeff: 0.009999999999999998
          kl: 0.0064668739418678855
          policy_loss: -0.0008310637540287441
          total_loss: 0.7736429830392202
          vf_explained_var: 0.7340058088302612
          vf_loss: 0.79431405266126
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,69,1850.11,69000,-31.989,-27,-38.6,319.89




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-28_21-37-32
  done: false
  episode_len_mean: 319.51
  episode_media: {}
  episode_reward_max: -26.1000000000001
  episode_reward_mean: -31.95100000000018
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 211
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.046265729268392
          entropy_coeff: 0.009999999999999998
          kl: 0.01845364082580853
          policy_loss: 0.08654147022300296
          total_loss: 1.249888100557857
          vf_explained_var: 0.5358062982559204
          vf_loss: 1.1819639374812445
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,70,1891.01,70000,-31.951,-26.1,-38.6,319.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-28_21-37-57
  done: false
  episode_len_mean: 318.44
  episode_media: {}
  episode_reward_max: -26.1000000000001
  episode_reward_mean: -31.844000000000182
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 215
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9271530204349094
          entropy_coeff: 0.009999999999999998
          kl: 0.007592028224226905
          policy_loss: 0.0016923289332124922
          total_loss: 0.7028144220511119
          vf_explained_var: 0.6953128576278687
          vf_loss: 0.7196344137191772
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,71,1915.24,71000,-31.844,-26.1,-38.6,318.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-28_21-38-22
  done: false
  episode_len_mean: 317.71
  episode_media: {}
  episode_reward_max: -26.1000000000001
  episode_reward_mean: -31.771000000000186
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 218
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0188391539785595
          entropy_coeff: 0.009999999999999998
          kl: 0.00978570739517555
          policy_loss: 0.01840096985300382
          total_loss: 0.8226213862498601
          vf_explained_var: 0.6891359686851501
          vf_loss: 0.8234302357873983
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,72,1940.07,72000,-31.771,-26.1,-38.6,317.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-28_21-38-47
  done: false
  episode_len_mean: 316.47
  episode_media: {}
  episode_reward_max: -26.1000000000001
  episode_reward_mean: -31.647000000000176
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 221
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0848758512073093
          entropy_coeff: 0.009999999999999998
          kl: 0.0138860835246021
          policy_loss: -0.12826819337076611
          total_loss: 0.5767209043105443
          vf_explained_var: 0.7431597113609314
          vf_loss: 0.7244492464595371
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,73,1965.03,73000,-31.647,-26.1,-38.6,316.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-28_21-39-10
  done: false
  episode_len_mean: 314.77
  episode_media: {}
  episode_reward_max: -26.1000000000001
  episode_reward_mean: -31.477000000000174
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 225
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.056730474366082
          entropy_coeff: 0.009999999999999998
          kl: 0.0122086304651256
          policy_loss: -0.03600456652541955
          total_loss: 1.5133572949303522
          vf_explained_var: 0.35742318630218506
          vf_loss: 1.5687082899941338
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,74,1988.81,74000,-31.477,-26.1,-38.6,314.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-28_21-39-33
  done: false
  episode_len_mean: 313.73
  episode_media: {}
  episode_reward_max: -26.1000000000001
  episode_reward_mean: -31.37300000000017
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 228
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0611247486538358
          entropy_coeff: 0.009999999999999998
          kl: 0.009065873660147578
          policy_loss: 0.08378639900022083
          total_loss: 0.9272942490047879
          vf_explained_var: 0.5267271995544434
          vf_loss: 0.8632125051485168
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,75,2011.86,75000,-31.373,-26.1,-38.6,313.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-28_21-39-57
  done: false
  episode_len_mean: 313.03
  episode_media: {}
  episode_reward_max: -26.1000000000001
  episode_reward_mean: -31.303000000000175
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 231
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.0087525380982294
          entropy_coeff: 0.009999999999999998
          kl: 0.009360404005885274
          policy_loss: 0.053752438227335615
          total_loss: 0.9391558607419331
          vf_explained_var: 0.4411328136920929
          vf_loss: 0.9045549041695065
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,76,2035.02,76000,-31.303,-26.1,-38.6,313.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-28_21-40-21
  done: false
  episode_len_mean: 312.16
  episode_media: {}
  episode_reward_max: -26.1000000000001
  episode_reward_mean: -31.216000000000175
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 234
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.03841835392846
          entropy_coeff: 0.009999999999999998
          kl: 0.01030853786306933
          policy_loss: -0.026000821590423585
          total_loss: 1.1126017755932278
          vf_explained_var: 0.4032350182533264
          vf_loss: 1.1579559326999718
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,77,2059.68,77000,-31.216,-26.1,-38.6,312.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-28_21-40-47
  done: false
  episode_len_mean: 309.54
  episode_media: {}
  episode_reward_max: -26.1000000000001
  episode_reward_mean: -30.954000000000168
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 238
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9474869158532884
          entropy_coeff: 0.009999999999999998
          kl: 0.011962337334928266
          policy_loss: 0.0103246727751361
          total_loss: 1.2994773719045851
          vf_explained_var: 0.30687180161476135
          vf_loss: 1.3074313441912333
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,78,2084.93,78000,-30.954,-26.1,-35.5,309.54




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-28_21-41-30
  done: false
  episode_len_mean: 308.08
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -30.80800000000017
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 3
  episodes_total: 241
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9059288342793783
          entropy_coeff: 0.009999999999999998
          kl: 0.0094691784709285
          policy_loss: 0.03595207565360599
          total_loss: 1.1772925764322282
          vf_explained_var: 0.43509000539779663
          vf_loss: 1.1594528726405569
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,79,2128.34,79000,-30.808,-24.7,-35.5,308.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-28_21-41-56
  done: false
  episode_len_mean: 306.16
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -30.616000000000167
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 245
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7847891608874003
          entropy_coeff: 0.009999999999999998
          kl: 0.015280786265752511
          policy_loss: -0.009971703754531012
          total_loss: 1.4459912525282965
          vf_explained_var: 0.19003255665302277
          vf_loss: 1.4722827580240039
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,80,2154.56,80000,-30.616,-24.7,-35.5,306.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-28_21-42-22
  done: false
  episode_len_mean: 304.81
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -30.48100000000016
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 3
  episodes_total: 248
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8473067985640632
          entropy_coeff: 0.009999999999999998
          kl: 0.006868631898371704
          policy_loss: 0.02705937491522895
          total_loss: 1.0594288948509427
          vf_explained_var: 0.33079782128334045
          vf_loss: 1.0501557138231066
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,81,2180.49,81000,-30.481,-24.7,-35.5,304.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-28_21-42-49
  done: false
  episode_len_mean: 304.1
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -30.410000000000164
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 252
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8539615710576376
          entropy_coeff: 0.009999999999999998
          kl: 0.009343481066386399
          policy_loss: 0.012063627607292599
          total_loss: 1.169883175690969
          vf_explained_var: 0.44903483986854553
          vf_loss: 1.175424814886517
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,82,2207.18,82000,-30.41,-24.7,-35.5,304.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-28_21-43-15
  done: false
  episode_len_mean: 302.8
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -30.280000000000157
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 3
  episodes_total: 255
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6673126697540284
          entropy_coeff: 0.009999999999999998
          kl: 0.014636657177723241
          policy_loss: -0.11059291917416784
          total_loss: 1.0673136121696896
          vf_explained_var: 0.47871288657188416
          vf_loss: 1.1931159814198813
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,83,2233.64,83000,-30.28,-24.7,-35.5,302.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-28_21-43-45
  done: false
  episode_len_mean: 300.86
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -30.08600000000016
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 259
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6159126785066393
          entropy_coeff: 0.009999999999999998
          kl: 0.00477757113003913
          policy_loss: -0.009085914823744032
          total_loss: 0.9430643002192179
          vf_explained_var: 0.6476970314979553
          vf_loss: 0.9678315831555261
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,84,2263.62,84000,-30.086,-24.7,-35.5,300.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-28_21-44-13
  done: false
  episode_len_mean: 298.64
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -29.86400000000015
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 263
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5850182268354627
          entropy_coeff: 0.009999999999999998
          kl: 0.00896083318446017
          policy_loss: 0.038951287004682755
          total_loss: 0.819143416484197
          vf_explained_var: 0.7318225502967834
          vf_loss: 0.7955942703617944
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,85,2291.27,85000,-29.864,-24.7,-35.5,298.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-28_21-44-40
  done: false
  episode_len_mean: 297.52
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -29.752000000000148
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 4
  episodes_total: 267
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6992101788520813
          entropy_coeff: 0.009999999999999998
          kl: 0.008825904136542403
          policy_loss: 0.08554222799009747
          total_loss: 0.5313386672072941
          vf_explained_var: 0.8895225524902344
          vf_loss: 0.46234724753432804
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,86,2318.27,86000,-29.752,-24.7,-35.5,297.52




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-28_21-45-23
  done: false
  episode_len_mean: 296.43
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.643000000000153
  episode_reward_min: -35.500000000000234
  episodes_this_iter: 3
  episodes_total: 270
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6597700542873806
          entropy_coeff: 0.009999999999999998
          kl: 0.020374154069834324
          policy_loss: -0.0862721410062578
          total_loss: 0.26515325779716176
          vf_explained_var: 0.9078719019889832
          vf_loss: 0.3670043942001131
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,87,2361.56,87000,-29.643,-24.6,-35.5,296.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-28_21-45-51
  done: false
  episode_len_mean: 294.31
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.43100000000015
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 274
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.075
          cur_lr: 5.000000000000001e-05
          entropy: 1.6251319845517476
          entropy_coeff: 0.009999999999999998
          kl: 0.03236235729911685
          policy_loss: 0.0497599425415198
          total_loss: 1.6364855700069003
          vf_explained_var: 0.738978385925293
          vf_loss: 1.6005497574806213
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore: 88

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,88,2388.79,88000,-29.431,-24.6,-35.1,294.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-28_21-46-15
  done: false
  episode_len_mean: 293.01
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.301000000000148
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 3
  episodes_total: 277
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.757904162671831
          entropy_coeff: 0.009999999999999998
          kl: 0.015549222371930287
          policy_loss: 0.06584378563695484
          total_loss: 1.1289044962988959
          vf_explained_var: 0.49162593483924866
          vf_loss: 1.0788904673523374
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,89,2413.13,89000,-29.301,-24.6,-34,293.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-28_21-46-37
  done: false
  episode_len_mean: 292.51
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.251000000000143
  episode_reward_min: -33.1000000000002
  episodes_this_iter: 3
  episodes_total: 280
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.826126562224494
          entropy_coeff: 0.009999999999999998
          kl: 0.011681541470445072
          policy_loss: -0.07615437987777922
          total_loss: 1.481295390923818
          vf_explained_var: 0.2378678023815155
          vf_loss: 1.5743968605995178
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,90,2435.01,90000,-29.251,-24.6,-33.1,292.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-28_21-47-00
  done: false
  episode_len_mean: 292.4
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.24000000000015
  episode_reward_min: -33.1000000000002
  episodes_this_iter: 4
  episodes_total: 284
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8847602393892076
          entropy_coeff: 0.009999999999999998
          kl: 0.012665525144391529
          policy_loss: -0.025138775010903676
          total_loss: 1.5485052439901563
          vf_explained_var: 0.1907222718000412
          vf_loss: 1.5910667366451687
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,91,2458.23,91000,-29.24,-24.6,-33.1,292.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-28_21-47-23
  done: false
  episode_len_mean: 292.58
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.25800000000015
  episode_reward_min: -34.60000000000022
  episodes_this_iter: 3
  episodes_total: 287
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8811944484710694
          entropy_coeff: 0.009999999999999998
          kl: 0.010075181485177085
          policy_loss: 0.013235549132029215
          total_loss: 1.147181487745709
          vf_explained_var: 0.2754970192909241
          vf_loss: 1.1516244214442042
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,92,2480.79,92000,-29.258,-24.6,-34.6,292.58


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-28_21-47-46
  done: false
  episode_len_mean: 293.44
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.344000000000147
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 3
  episodes_total: 290
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.827814519405365
          entropy_coeff: 0.009999999999999998
          kl: 0.012610954164529096
          policy_loss: 0.01687854006886482
          total_loss: 1.1151180393166011
          vf_explained_var: 0.23999549448490143
          vf_loss: 1.1150989015069273
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,93,2504.09,93000,-29.344,-24.6,-34.9,293.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-28_21-48-07
  done: false
  episode_len_mean: 294.34
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.434000000000147
  episode_reward_min: -37.50000000000026
  episodes_this_iter: 2
  episodes_total: 292
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.8298054708374871
          entropy_coeff: 0.009999999999999998
          kl: 0.006617720091605737
          policy_loss: -0.09398132032818264
          total_loss: 1.0457977874411477
          vf_explained_var: 0.16513413190841675
          vf_loss: 1.1573326822784211
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,94,2525.17,94000,-29.434,-24.6,-37.5,294.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-28_21-48-27
  done: false
  episode_len_mean: 296.91
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.691000000000145
  episode_reward_min: -39.700000000000294
  episodes_this_iter: 3
  episodes_total: 295
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.870252948337131
          entropy_coeff: 0.009999999999999998
          kl: 0.007156639056436376
          policy_loss: 0.039652263124783836
          total_loss: 1.1856385366784201
          vf_explained_var: -0.008374584838747978
          vf_loss: 1.1638836773733299
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,95,2545.07,95000,-29.691,-24.6,-39.7,296.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-28_21-48-49
  done: false
  episode_len_mean: 298.36
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -29.836000000000148
  episode_reward_min: -39.700000000000294
  episodes_this_iter: 3
  episodes_total: 298
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.7699098467826844
          entropy_coeff: 0.009999999999999998
          kl: 0.012510474422181951
          policy_loss: 0.04135376777913835
          total_loss: 1.2117097000281016
          vf_explained_var: 0.026414325460791588
          vf_loss: 1.1866476001011002
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,96,2567.19,96000,-29.836,-24.6,-39.7,298.36




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-28_21-49-27
  done: false
  episode_len_mean: 300.37
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -30.03700000000015
  episode_reward_min: -39.700000000000294
  episodes_this_iter: 3
  episodes_total: 301
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6686962922414144
          entropy_coeff: 0.009999999999999998
          kl: 0.010900167306633204
          policy_loss: 0.04859669125742382
          total_loss: 1.245469590028127
          vf_explained_var: 0.13756252825260162
          vf_loss: 1.2123335767537355
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,97,2604.85,97000,-30.037,-24.6,-39.7,300.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-28_21-49-46
  done: false
  episode_len_mean: 302.73
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -30.273000000000156
  episode_reward_min: -41.30000000000032
  episodes_this_iter: 2
  episodes_total: 303
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6137212991714478
          entropy_coeff: 0.009999999999999998
          kl: 0.009069889655686605
          policy_loss: -0.08092884139882194
          total_loss: 1.1126542846361795
          vf_explained_var: 0.024448858574032784
          vf_loss: 1.208699972844786
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,98,2623.99,98000,-30.273,-24.6,-41.3,302.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-28_21-50-04
  done: false
  episode_len_mean: 305.26
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -30.526000000000163
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 2
  episodes_total: 305
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.5424607396125793
          entropy_coeff: 0.009999999999999998
          kl: 0.009122958118470829
          policy_loss: -0.06792881538470587
          total_loss: 0.8552342302269406
          vf_explained_var: 0.011960729025304317
          vf_loss: 0.9375613075784511
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,99,2642.11,99000,-30.526,-24.6,-42.5,305.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-28_21-50-21
  done: false
  episode_len_mean: 309.89
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -30.989000000000175
  episode_reward_min: -46.30000000000039
  episodes_this_iter: 3
  episodes_total: 308
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.6057511885960898
          entropy_coeff: 0.009999999999999998
          kl: 0.007705064473607
          policy_loss: 0.048720718423525496
          total_loss: 1.2591726899147033
          vf_explained_var: 0.0568501353263855
          vf_loss: 1.22564266944925
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,100,2659.2,100000,-30.989,-24.6,-46.3,309.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-28_21-50-40
  done: false
  episode_len_mean: 312.53
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -31.253000000000174
  episode_reward_min: -46.30000000000039
  episodes_this_iter: 2
  episodes_total: 310
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11250000000000004
          cur_lr: 5.000000000000001e-05
          entropy: 1.532105767726898
          entropy_coeff: 0.009999999999999998
          kl: 0.004545512291356325
          policy_loss: 0.06390112472905053
          total_loss: 0.7424301140838199
          vf_explained_var: 0.06337295472621918
          vf_loss: 0.6933386698572173
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,101,2677.68,101000,-31.253,-24.6,-46.3,312.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-28_21-50-58
  done: false
  episode_len_mean: 314.67
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -31.46700000000018
  episode_reward_min: -46.30000000000039
  episodes_this_iter: 2
  episodes_total: 312
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05625000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.480337761508094
          entropy_coeff: 0.009999999999999998
          kl: 0.009539083940458997
          policy_loss: -0.05068558752536774
          total_loss: 0.8070943224761221
          vf_explained_var: 0.05817675590515137
          vf_loss: 0.872046709071017
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,102,2696.21,102000,-31.467,-24.6,-46.3,314.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-28_21-51-15
  done: false
  episode_len_mean: 319.15
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -31.915000000000187
  episode_reward_min: -46.30000000000039
  episodes_this_iter: 3
  episodes_total: 315
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05625000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 1.5215475373797946
          entropy_coeff: 0.009999999999999998
          kl: 0.0041702052733001375
          policy_loss: 0.06534396352039443
          total_loss: 1.310001058710946
          vf_explained_var: -0.14942820370197296
          vf_loss: 1.2596379935327504
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,103,2712.52,103000,-31.915,-24.6,-46.3,319.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-28_21-51-33
  done: false
  episode_len_mean: 321.87
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -32.18700000000019
  episode_reward_min: -46.30000000000039
  episodes_this_iter: 2
  episodes_total: 317
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02812500000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4801391336652967
          entropy_coeff: 0.009999999999999998
          kl: 0.008967011589600895
          policy_loss: -0.04129387603865729
          total_loss: 1.0659564233488508
          vf_explained_var: -0.3131033480167389
          vf_loss: 1.1217995074060227
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,104,2731.08,104000,-32.187,-24.6,-46.3,321.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-28_21-51-51
  done: false
  episode_len_mean: 324.93
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -32.493000000000194
  episode_reward_min: -46.30000000000039
  episodes_this_iter: 2
  episodes_total: 319
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02812500000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3648130350642733
          entropy_coeff: 0.009999999999999998
          kl: 0.011514546656249456
          policy_loss: -0.09604615138636695
          total_loss: 1.2637238009108438
          vf_explained_var: -0.01013581920415163
          vf_loss: 1.373094236602386
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,105,2748.63,105000,-32.493,-24.6,-46.3,324.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-28_21-52-08
  done: false
  episode_len_mean: 328.22
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -32.822000000000195
  episode_reward_min: -48.200000000000415
  episodes_this_iter: 2
  episodes_total: 321
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02812500000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4325347304344178
          entropy_coeff: 0.009999999999999998
          kl: 0.006879793196777702
          policy_loss: -0.09503273997041914
          total_loss: 1.221828387512101
          vf_explained_var: 0.07931005954742432
          vf_loss: 1.3309929895732138
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,106,2765.41,106000,-32.822,-24.6,-48.2,328.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-28_21-52-24
  done: false
  episode_len_mean: 332.86
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -33.2860000000002
  episode_reward_min: -49.10000000000043
  episodes_this_iter: 3
  episodes_total: 324
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02812500000000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3920217752456665
          entropy_coeff: 0.009999999999999998
          kl: 0.002676385674520349
          policy_loss: 0.04716867953538895
          total_loss: 1.3327257924609714
          vf_explained_var: 0.061334311962127686
          vf_loss: 1.299402070707745
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,107,2782.02,107000,-33.286,-24.6,-49.1,332.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-28_21-52-41
  done: false
  episode_len_mean: 336.66
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -33.66600000000021
  episode_reward_min: -50.90000000000045
  episodes_this_iter: 2
  episodes_total: 326
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.4945304963323804
          entropy_coeff: 0.009999999999999998
          kl: 0.015286611624462904
          policy_loss: 0.0900473521815406
          total_loss: 0.7632981499036153
          vf_explained_var: -0.01229284331202507
          vf_loss: 0.6879811402410269
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,108,2798.36,108000,-33.666,-24.6,-50.9,336.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-28_21-52-58
  done: false
  episode_len_mean: 339.34
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -33.93400000000022
  episode_reward_min: -50.90000000000045
  episodes_this_iter: 2
  episodes_total: 328
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.5011759824222988
          entropy_coeff: 0.009999999999999998
          kl: 0.009250013530867839
          policy_loss: 0.09696193867259556
          total_loss: 0.8067274332046509
          vf_explained_var: 0.0682019516825676
          vf_loss: 0.7246471728715632
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,109,2815.87,109000,-33.934,-24.6,-50.9,339.34




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-28_21-53-32
  done: false
  episode_len_mean: 341.31
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -34.13100000000022
  episode_reward_min: -50.90000000000045
  episodes_this_iter: 2
  episodes_total: 330
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.4247627947065564
          entropy_coeff: 0.009999999999999998
          kl: 0.008669149196134566
          policy_loss: -0.09271489547358619
          total_loss: 1.304881868428654
          vf_explained_var: -0.0972784012556076
          vf_loss: 1.4117224585678843
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,110,2849.93,110000,-34.131,-24.6,-50.9,341.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-28_21-53-50
  done: false
  episode_len_mean: 344.48
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -34.44800000000023
  episode_reward_min: -50.90000000000045
  episodes_this_iter: 2
  episodes_total: 332
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.325934816731347
          entropy_coeff: 0.009999999999999998
          kl: 0.010550272833693954
          policy_loss: -0.08995407687293158
          total_loss: 1.2497198588318295
          vf_explained_var: 0.12414001673460007
          vf_loss: 1.3527849304179351
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,111,2868.16,111000,-34.448,-24.6,-50.9,344.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-28_21-54-07
  done: false
  episode_len_mean: 348.07
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -34.80700000000023
  episode_reward_min: -50.90000000000045
  episodes_this_iter: 2
  episodes_total: 334
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.3701013604799905
          entropy_coeff: 0.009999999999999998
          kl: 0.0077333258507586615
          policy_loss: -0.07468778722816044
          total_loss: 1.0806770811478297
          vf_explained_var: -0.03811398521065712
          vf_loss: 1.1689571385789248
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,112,2884.44,112000,-34.807,-24.6,-50.9,348.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-28_21-54-23
  done: false
  episode_len_mean: 351.5
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -35.15000000000023
  episode_reward_min: -50.90000000000045
  episodes_this_iter: 2
  episodes_total: 336
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.3667989995744494
          entropy_coeff: 0.009999999999999998
          kl: 0.007386221345054859
          policy_loss: -0.08936198304096858
          total_loss: 1.2270420799652735
          vf_explained_var: 0.0026422678492963314
          vf_loss: 1.3299681841499276
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,113,2900.66,113000,-35.15,-24.6,-50.9,351.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-28_21-54-40
  done: false
  episode_len_mean: 355.33
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -35.53300000000024
  episode_reward_min: -51.000000000000455
  episodes_this_iter: 2
  episodes_total: 338
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.3681552463107638
          entropy_coeff: 0.009999999999999998
          kl: 0.00479524569573147
          policy_loss: -0.08474485443698035
          total_loss: 1.228894484705395
          vf_explained_var: 0.03372213616967201
          vf_loss: 1.3272534582349989
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,114,2917.39,114000,-35.533,-24.6,-51,355.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-28_21-54-57
  done: false
  episode_len_mean: 360.93
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -36.093000000000245
  episode_reward_min: -51.000000000000455
  episodes_this_iter: 3
  episodes_total: 341
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.4200520051850214
          entropy_coeff: 0.009999999999999998
          kl: 0.008150859923621696
          policy_loss: 0.06581075059043037
          total_loss: 1.1818854053815206
          vf_explained_var: -0.18185143172740936
          vf_loss: 1.1302178607632716
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,115,2934.26,115000,-36.093,-24.6,-51,360.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-28_21-55-13
  done: false
  episode_len_mean: 364.55
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -36.45500000000025
  episode_reward_min: -51.000000000000455
  episodes_this_iter: 2
  episodes_total: 343
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.4192258622911242
          entropy_coeff: 0.009999999999999998
          kl: 0.006951629135768735
          policy_loss: 0.10308710568481022
          total_loss: 0.7724905904796389
          vf_explained_var: -0.09086459130048752
          vf_loss: 0.6835468638791806
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,116,2950.76,116000,-36.455,-24.6,-51,364.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-28_21-55-29
  done: false
  episode_len_mean: 368.33
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -36.833000000000254
  episode_reward_min: -51.000000000000455
  episodes_this_iter: 2
  episodes_total: 345
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.383765497472551
          entropy_coeff: 0.009999999999999998
          kl: 0.007095403160058764
          policy_loss: 0.04103886153962877
          total_loss: 0.7649021360609266
          vf_explained_var: -0.23020943999290466
          vf_loss: 0.7376510519456739
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,117,2966.34,117000,-36.833,-24.6,-51,368.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-28_21-55-44
  done: false
  episode_len_mean: 372.98
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -37.29800000000026
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 2
  episodes_total: 347
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.366085316075219
          entropy_coeff: 0.009999999999999998
          kl: 0.006924933638693176
          policy_loss: 0.0558273701204194
          total_loss: 0.7910166177484724
          vf_explained_var: -0.13931682705879211
          vf_loss: 0.7488013845506227
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,118,2981.03,118000,-37.298,-24.6,-52.6,372.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-28_21-56-00
  done: false
  episode_len_mean: 377.22
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -37.72200000000026
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 2
  episodes_total: 349
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.4693721095720926
          entropy_coeff: 0.009999999999999998
          kl: 0.008766161309448359
          policy_loss: 0.05142678485976325
          total_loss: 0.798692888352606
          vf_explained_var: -0.07134505361318588
          vf_loss: 0.7618982030803131
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,119,2997.5,119000,-37.722,-24.6,-52.6,377.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-28_21-56-16
  done: false
  episode_len_mean: 381.43
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -38.14300000000027
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 2
  episodes_total: 351
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.4560705741246542
          entropy_coeff: 0.009999999999999998
          kl: 0.006194052814156
          policy_loss: 0.006789938857158025
          total_loss: 0.8960948480500115
          vf_explained_var: -0.020720254629850388
          vf_loss: 0.9038220776451958
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,120,3013.39,120000,-38.143,-24.6,-52.6,381.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-28_21-56-34
  done: false
  episode_len_mean: 384.25
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -38.42500000000027
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 2
  episodes_total: 353
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.520809523264567
          entropy_coeff: 0.009999999999999998
          kl: 0.011441159847118361
          policy_loss: -0.056404263112280105
          total_loss: 1.070660781694783
          vf_explained_var: -0.01507346611469984
          vf_loss: 1.1421927091355124
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,121,3031.77,121000,-38.425,-24.6,-52.6,384.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-28_21-56-54
  done: false
  episode_len_mean: 388.03
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -38.80300000000028
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 356
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.5259874131944444
          entropy_coeff: 0.009999999999999998
          kl: 0.009234460032370882
          policy_loss: 0.031145731856425603
          total_loss: 1.4532050069835452
          vf_explained_var: -0.035684213042259216
          vf_loss: 1.4372542143695883
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,122,3050.98,122000,-38.803,-24.6,-52.6,388.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-28_21-57-14
  done: false
  episode_len_mean: 390.96
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -39.09600000000029
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 359
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.4845589107937283
          entropy_coeff: 0.009999999999999998
          kl: 0.009766385948071053
          policy_loss: 0.08000715192821291
          total_loss: 1.0824031819899878
          vf_explained_var: -0.024751821532845497
          vf_loss: 1.0171729500922893
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,123,3071.59,123000,-39.096,-24.6,-52.6,390.96




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-28_21-57-55
  done: false
  episode_len_mean: 393.14
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -39.31400000000029
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 362
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.5198625405629476
          entropy_coeff: 0.009999999999999998
          kl: 0.008246225458373363
          policy_loss: 0.05145017156998317
          total_loss: 1.4630303263664246
          vf_explained_var: -0.26090899109840393
          vf_loss: 1.426720788743761
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,124,3111.94,124000,-39.314,-24.6,-52.6,393.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-28_21-58-17
  done: false
  episode_len_mean: 395.44
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -39.54400000000029
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 365
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.4540424744288127
          entropy_coeff: 0.009999999999999998
          kl: 0.012442685452664855
          policy_loss: 0.07133871946069929
          total_loss: 1.0567794064680736
          vf_explained_var: -0.022054024040699005
          vf_loss: 0.9998936163054573
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,125,3134.04,125000,-39.544,-24.6,-52.6,395.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-28_21-58-40
  done: false
  episode_len_mean: 397.03
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -39.703000000000294
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 368
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.4452722152074178
          entropy_coeff: 0.009999999999999998
          kl: 0.006851932221554572
          policy_loss: 0.08485050085518095
          total_loss: 0.9930821494923697
          vf_explained_var: -0.03301136940717697
          vf_loss: 0.9226361817783779
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,126,3157.11,126000,-39.703,-24.6,-52.6,397.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-28_21-59-03
  done: false
  episode_len_mean: 398.18
  episode_media: {}
  episode_reward_max: -28.000000000000128
  episode_reward_mean: -39.81800000000029
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 371
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.3776885006162856
          entropy_coeff: 0.009999999999999998
          kl: 0.014628489148027887
          policy_loss: 0.03483821368879742
          total_loss: 1.4706106308433744
          vf_explained_var: 0.0008448289590887725
          vf_loss: 1.449446436845594
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,127,3180.74,127000,-39.818,-28,-52.6,398.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-28_21-59-29
  done: false
  episode_len_mean: 398.15
  episode_media: {}
  episode_reward_max: -27.100000000000115
  episode_reward_mean: -39.81500000000029
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 374
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.334058928489685
          entropy_coeff: 0.009999999999999998
          kl: 0.007883927126551725
          policy_loss: -0.1038041525416904
          total_loss: 1.8652921040852866
          vf_explained_var: 0.038532935082912445
          vf_loss: 1.9823814206653172
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,128,3206.08,128000,-39.815,-27.1,-52.6,398.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-28_21-59-55
  done: false
  episode_len_mean: 397.31
  episode_media: {}
  episode_reward_max: -26.200000000000102
  episode_reward_mean: -39.73100000000029
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 378
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.2831128928396436
          entropy_coeff: 0.009999999999999998
          kl: 0.005670280435135627
          policy_loss: 0.01677210380633672
          total_loss: 1.7466549648178948
          vf_explained_var: 0.04457005858421326
          vf_loss: 1.7426741348372565
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,129,3231.86,129000,-39.731,-26.2,-52.6,397.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-28_22-00-19
  done: false
  episode_len_mean: 395.51
  episode_media: {}
  episode_reward_max: -25.900000000000098
  episode_reward_mean: -39.55100000000029
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 382
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 1.301213056511349
          entropy_coeff: 0.009999999999999998
          kl: 0.039057783422576146
          policy_loss: 0.02769445077412658
          total_loss: 1.3801363097296822
          vf_explained_var: 0.3691636025905609
          vf_loss: 1.3651793658733369
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,130,3256.54,130000,-39.551,-25.9,-52.6,395.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-28_22-00-46
  done: false
  episode_len_mean: 393.57
  episode_media: {}
  episode_reward_max: -25.800000000000097
  episode_reward_mean: -39.35700000000029
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 386
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010546874999999997
          cur_lr: 5.000000000000001e-05
          entropy: 1.445606177382999
          entropy_coeff: 0.009999999999999998
          kl: 0.03749410550825046
          policy_loss: -0.02343292964829339
          total_loss: 1.6279252277480232
          vf_explained_var: 0.30642926692962646
          vf_loss: 1.6654187732272678
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,131,3283.64,131000,-39.357,-25.8,-52.6,393.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-28_22-01-13
  done: false
  episode_len_mean: 391.12
  episode_media: {}
  episode_reward_max: -25.40000000000009
  episode_reward_mean: -39.112000000000286
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 389
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4229677425490486
          entropy_coeff: 0.009999999999999998
          kl: 0.013304373628697085
          policy_loss: -0.05124029856589105
          total_loss: 0.9887455297840966
          vf_explained_var: 0.5445629358291626
          vf_loss: 1.0540050208568572
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,132,3310.61,132000,-39.112,-25.4,-52.6,391.12




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-28_22-02-00
  done: false
  episode_len_mean: 387.5
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -38.750000000000284
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 393
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2869820766978795
          entropy_coeff: 0.009999999999999998
          kl: 0.019956439932700357
          policy_loss: -0.05679650521940655
          total_loss: 1.2229458457893796
          vf_explained_var: 0.28997689485549927
          vf_loss: 1.2922964460319943
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,133,3357.04,133000,-38.75,-24.8,-52.6,387.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-28_22-02-27
  done: false
  episode_len_mean: 383.13
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -38.31300000000027
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 397
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3699379258685642
          entropy_coeff: 0.009999999999999998
          kl: 0.01741808706941295
          policy_loss: 0.0237647860414452
          total_loss: 1.2449015726645787
          vf_explained_var: 0.5095900297164917
          vf_loss: 1.2345606211158964
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,134,3384.29,134000,-38.313,-24.8,-52.6,383.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-28_22-02-54
  done: false
  episode_len_mean: 379.54
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -37.95400000000027
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 401
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.4071073955959743
          entropy_coeff: 0.009999999999999998
          kl: 0.01635349622620799
          policy_loss: 0.030301563772890304
          total_loss: 1.1650327192412482
          vf_explained_var: 0.49509960412979126
          vf_loss: 1.1485435240798527
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,135,3411.49,135000,-37.954,-24.8,-52.6,379.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-28_22-03-21
  done: false
  episode_len_mean: 373.86
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -37.386000000000266
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 405
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.3829334404733447
          entropy_coeff: 0.009999999999999998
          kl: 0.009478784533823791
          policy_loss: 0.03528033287988769
          total_loss: 0.37919165848029984
          vf_explained_var: 0.9113125205039978
          vf_loss: 0.3575907020105256
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,136,3438.08,136000,-37.386,-24.8,-52.6,373.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-28_22-03-49
  done: false
  episode_len_mean: 368.31
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -36.83100000000026
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 408
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0158203125
          cur_lr: 5.000000000000001e-05
          entropy: 1.2292865541246203
          entropy_coeff: 0.009999999999999998
          kl: 0.028457591805182068
          policy_loss: 0.05027722186512417
          total_loss: 0.91309215856923
          vf_explained_var: 0.8993532061576843
          vf_loss: 0.8746576027737724
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,137,3465.65,137000,-36.831,-24.8,-52.6,368.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-28_22-04-14
  done: false
  episode_len_mean: 362.8
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -36.28000000000025
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 412
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3987440493371752
          entropy_coeff: 0.009999999999999998
          kl: 0.014112995885681833
          policy_loss: -0.01286860207716624
          total_loss: 1.6407985117700365
          vf_explained_var: 0.6043045520782471
          vf_loss: 1.667319643497467
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,138,3491.15,138000,-36.28,-24.8,-52.6,362.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-28_22-04-39
  done: false
  episode_len_mean: 356.04
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -35.60400000000024
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 416
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3215106315082974
          entropy_coeff: 0.009999999999999998
          kl: 0.008471010683700481
          policy_loss: 0.029586703495846856
          total_loss: 0.8214579618639416
          vf_explained_var: 0.7015839219093323
          vf_loss: 0.8048853387435277
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,139,3516.56,139000,-35.604,-24.8,-52.6,356.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-28_22-05-05
  done: false
  episode_len_mean: 351.51
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -35.151000000000224
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 419
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4116574313905503
          entropy_coeff: 0.009999999999999998
          kl: 0.008722544324236579
          policy_loss: -0.00434177964925766
          total_loss: 0.791520803173383
          vf_explained_var: 0.6307913064956665
          vf_loss: 0.8097721729013655
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,140,3541.66,140000,-35.151,-24.8,-52.6,351.51




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-28_22-05-45
  done: false
  episode_len_mean: 347.03
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -34.70300000000022
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 422
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5297083099683126
          entropy_coeff: 0.009999999999999998
          kl: 0.010343845895267812
          policy_loss: -0.10843932678302129
          total_loss: 1.3705709987216526
          vf_explained_var: 0.2516462206840515
          vf_loss: 1.4940619495179919
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,141,3582.1,141000,-34.703,-24.8,-52.6,347.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-28_22-06-08
  done: false
  episode_len_mean: 341.89
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -34.18900000000021
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 425
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5559021989504496
          entropy_coeff: 0.009999999999999998
          kl: 0.013616737435403476
          policy_loss: -0.10431074839499262
          total_loss: 1.4318628893958198
          vf_explained_var: 0.11990280449390411
          vf_loss: 1.5514095240169101
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,142,3605.38,142000,-34.189,-24.8,-52.6,341.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-28_22-06-32
  done: false
  episode_len_mean: 335.99
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -33.5990000000002
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 429
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.614038089911143
          entropy_coeff: 0.009999999999999998
          kl: 0.01131460297342836
          policy_loss: 0.05638201932112376
          total_loss: 1.1503185212612153
          vf_explained_var: 0.18093609809875488
          vf_loss: 1.1098083860344357
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,143,3629.35,143000,-33.599,-24.8,-52.6,335.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-28_22-06-57
  done: false
  episode_len_mean: 331.97
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -33.1970000000002
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 432
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.5679687751664055
          entropy_coeff: 0.009999999999999998
          kl: 0.009046459687389824
          policy_loss: 0.07096377147568597
          total_loss: 0.8944787558582094
          vf_explained_var: 0.3403140902519226
          vf_loss: 0.8389799928706553
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,144,3653.49,144000,-33.197,-24.8,-52.6,331.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-28_22-07-23
  done: false
  episode_len_mean: 323.74
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -32.37400000000019
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 436
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3435532516903348
          entropy_coeff: 0.009999999999999998
          kl: 0.01679939385508757
          policy_loss: -0.0006161086675193575
          total_loss: 1.2720075461599563
          vf_explained_var: 0.16599994897842407
          vf_loss: 1.285660539733039
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,145,3679.72,145000,-32.374,-24.8,-52.6,323.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-28_22-07-49
  done: false
  episode_len_mean: 317.74
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -31.774000000000182
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 3
  episodes_total: 439
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4597927451133728
          entropy_coeff: 0.009999999999999998
          kl: 0.011580847764196593
          policy_loss: -0.01124071114593082
          total_loss: 0.8036510686079661
          vf_explained_var: 0.36238914728164673
          vf_loss: 0.8292148927847545
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,146,3705.52,146000,-31.774,-24.8,-52.6,317.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-28_22-08-13
  done: false
  episode_len_mean: 310.64
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -31.06400000000017
  episode_reward_min: -52.60000000000048
  episodes_this_iter: 4
  episodes_total: 443
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4896532389852735
          entropy_coeff: 0.009999999999999998
          kl: 0.013870509303491079
          policy_loss: -0.00980621369348632
          total_loss: 1.626329571670956
          vf_explained_var: 0.15779712796211243
          vf_loss: 1.6507031811608208
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,147,3730.03,147000,-31.064,-24.8,-52.6,310.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-28_22-08-39
  done: false
  episode_len_mean: 304.42
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -30.442000000000164
  episode_reward_min: -51.800000000000466
  episodes_this_iter: 3
  episodes_total: 446
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.3749467651049296
          entropy_coeff: 0.009999999999999998
          kl: 0.005871452850340934
          policy_loss: -0.007061768405967289
          total_loss: 0.9751034097539054
          vf_explained_var: 0.2362411916255951
          vf_loss: 0.9957753098673291
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,148,3755.7,148000,-30.442,-24.8,-51.8,304.42




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-28_22-09-20
  done: false
  episode_len_mean: 296.2
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -29.62000000000015
  episode_reward_min: -51.800000000000466
  episodes_this_iter: 4
  episodes_total: 450
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02373046875000001
          cur_lr: 5.000000000000001e-05
          entropy: 1.4183578054110209
          entropy_coeff: 0.009999999999999998
          kl: 0.02099824352429971
          policy_loss: 0.030069322221808963
          total_loss: 1.6285646941926744
          vf_explained_var: 0.0790877714753151
          vf_loss: 1.612180667453342
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,149,3797.25,149000,-29.62,-24.8,-51.8,296.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-28_22-09-46
  done: false
  episode_len_mean: 292.12
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -29.21200000000015
  episode_reward_min: -43.200000000000344
  episodes_this_iter: 3
  episodes_total: 453
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.352693948480818
          entropy_coeff: 0.009999999999999998
          kl: 0.0117066562447392
          policy_loss: 0.04675479274657038
          total_loss: 1.1596337993939718
          vf_explained_var: 0.1588527411222458
          vf_loss: 1.1259892248445087
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,150,3822.6,150000,-29.212,-24.8,-43.2,292.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-28_22-10-12
  done: false
  episode_len_mean: 287.9
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -28.79000000000014
  episode_reward_min: -38.300000000000274
  episodes_this_iter: 4
  episodes_total: 457
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3239024056328668
          entropy_coeff: 0.009999999999999998
          kl: 0.011389721199269844
          policy_loss: -0.011957168993022706
          total_loss: 1.6008041924900478
          vf_explained_var: 0.054460473358631134
          vf_loss: 1.6255949629677666
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,151,3848.58,151000,-28.79,-24.8,-38.3,287.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-28_22-10-39
  done: false
  episode_len_mean: 285.76
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -28.576000000000132
  episode_reward_min: -35.80000000000024
  episodes_this_iter: 3
  episodes_total: 460
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1974464615186056
          entropy_coeff: 0.009999999999999998
          kl: 0.005370319568896144
          policy_loss: -0.019509187175167933
          total_loss: 1.0647079514132605
          vf_explained_var: 0.09653226286172867
          vf_loss: 1.096000443233384
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,152,3876.07,152000,-28.576,-24.8,-35.8,285.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-28_22-11-07
  done: false
  episode_len_mean: 282.67
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -28.26700000000013
  episode_reward_min: -35.80000000000024
  episodes_this_iter: 4
  episodes_total: 464
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.275118929809994
          entropy_coeff: 0.009999999999999998
          kl: 0.012161506360646474
          policy_loss: 0.03170428582363659
          total_loss: 1.243099049727122
          vf_explained_var: 0.25798550248146057
          vf_loss: 1.2237130423386893
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,153,3903.6,153000,-28.267,-24.8,-35.8,282.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-28_22-11-34
  done: false
  episode_len_mean: 280.32
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -28.032000000000135
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 468
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3124155667093065
          entropy_coeff: 0.009999999999999998
          kl: 0.008275090195934708
          policy_loss: -0.01227447423670027
          total_loss: 1.1455897437201605
          vf_explained_var: 0.35735610127449036
          vf_loss: 1.1706938101185693
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,154,3930.56,154000,-28.032,-24.8,-34.9,280.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-28_22-11-59
  done: false
  episode_len_mean: 279.8
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -27.980000000000135
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 3
  episodes_total: 471
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3021884666548835
          entropy_coeff: 0.009999999999999998
          kl: 0.006639342627085022
          policy_loss: 0.08520439333385892
          total_loss: 0.9669514748785231
          vf_explained_var: 0.4874872863292694
          vf_loss: 0.8945326449970404
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,155,3955.39,155000,-27.98,-24.8,-34.9,279.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-28_22-12-24
  done: false
  episode_len_mean: 279.92
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -27.99200000000012
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 3
  episodes_total: 474
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.116340070300632
          entropy_coeff: 0.009999999999999998
          kl: 0.01254219270111073
          policy_loss: -0.07801270153787401
          total_loss: 1.4336131387286717
          vf_explained_var: 0.18607811629772186
          vf_loss: 1.5223427838749355
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,156,3980.83,156000,-27.992,-24.8,-34.9,279.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-28_22-12-52
  done: false
  episode_len_mean: 279.63
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -27.96300000000013
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 478
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.0092649870448642
          entropy_coeff: 0.009999999999999998
          kl: 0.010471887519557733
          policy_loss: 0.016269440121120877
          total_loss: 1.213512231906255
          vf_explained_var: 0.17068243026733398
          vf_loss: 1.2069626741939121
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,157,4008.25,157000,-27.963,-24.8,-34.9,279.63




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-28_22-13-38
  done: false
  episode_len_mean: 279.11
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.911000000000126
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 482
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.0664793034394582
          entropy_coeff: 0.009999999999999998
          kl: 0.009735353015257135
          policy_loss: -0.00899562860528628
          total_loss: 1.091365216175715
          vf_explained_var: 0.4884348213672638
          vf_loss: 1.1106791079044342
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,158,4054.23,158000,-27.911,-23.2,-34.9,279.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-28_22-14-02
  done: false
  episode_len_mean: 279.42
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.942000000000125
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 486
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.0207167214817472
          entropy_coeff: 0.009999999999999998
          kl: 0.006509967814703119
          policy_loss: -0.012030813760227627
          total_loss: 1.3992288019922046
          vf_explained_var: 0.3089653253555298
          vf_loss: 1.4212350911564298
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,159,4078.85,159000,-27.942,-23.2,-34.9,279.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-28_22-14-35
  done: false
  episode_len_mean: 279.61
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.961000000000126
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 490
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 0.9277077681488461
          entropy_coeff: 0.009999999999999998
          kl: 0.005645975972096827
          policy_loss: 0.0027663009862105054
          total_loss: 1.2046759088834127
          vf_explained_var: 0.4477129280567169
          vf_loss: 1.2109856989648606
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,160,4111.84,160000,-27.961,-23.2,-34.9,279.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-28_22-15-03
  done: false
  episode_len_mean: 279.71
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.971000000000128
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 3
  episodes_total: 493
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03559570312499999
          cur_lr: 5.000000000000001e-05
          entropy: 1.00173030561871
          entropy_coeff: 0.009999999999999998
          kl: 0.0025806549065574163
          policy_loss: -0.010708426353004244
          total_loss: 1.238050675392151
          vf_explained_var: 0.07790021598339081
          vf_loss: 1.2586845464176601
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,161,4139.28,161000,-27.971,-23.2,-34.9,279.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-28_22-15-30
  done: false
  episode_len_mean: 279.68
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.968000000000124
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 497
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 0.9457136233647664
          entropy_coeff: 0.009999999999999998
          kl: 0.008486628837429006
          policy_loss: -0.019027501179112328
          total_loss: 0.8888173119889365
          vf_explained_var: 0.5501078367233276
          vf_loss: 0.9171509062250455
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,162,4166.51,162000,-27.968,-23.2,-34.9,279.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-28_22-15-59
  done: false
  episode_len_mean: 278.97
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.89700000000013
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 501
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 0.8776634150081211
          entropy_coeff: 0.009999999999999998
          kl: 0.011745722986976799
          policy_loss: -0.055966343233982724
          total_loss: 0.4714088016086154
          vf_explained_var: 0.8015578985214233
          vf_loss: 0.5359427369303174
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,163,4195.75,163000,-27.897,-23.2,-34.9,278.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-28_22-16-25
  done: false
  episode_len_mean: 278.88
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.888000000000126
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 505
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.017797851562499994
          cur_lr: 5.000000000000001e-05
          entropy: 0.9640680962138706
          entropy_coeff: 0.009999999999999998
          kl: 0.039217800901787274
          policy_loss: 0.1342634520596928
          total_loss: 1.7230347454547883
          vf_explained_var: 0.5726137161254883
          vf_loss: 1.597714004251692
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,164,4221.99,164000,-27.888,-23.2,-34.9,278.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-28_22-16-53
  done: false
  episode_len_mean: 278.67
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.867000000000125
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 509
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 0.9928106009960175
          entropy_coeff: 0.009999999999999998
          kl: 0.00920005643838427
          policy_loss: -0.009358315997653537
          total_loss: 0.6780053403642442
          vf_explained_var: 0.7547175288200378
          vf_loss: 0.6970461378494899
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,165,4249.82,165000,-27.867,-23.2,-34.9,278.67




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-28_22-17-33
  done: false
  episode_len_mean: 279.7
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -27.970000000000123
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 3
  episodes_total: 512
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1834490617116293
          entropy_coeff: 0.009999999999999998
          kl: 0.007254692643170991
          policy_loss: 0.05079825437731213
          total_loss: 1.1106171909305784
          vf_explained_var: 0.24723269045352936
          vf_loss: 1.0714597496307559
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,166,4289.2,166000,-27.97,-23.2,-34.9,279.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-28_22-17-57
  done: false
  episode_len_mean: 281.53
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.153000000000123
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 3
  episodes_total: 515
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2271063526471455
          entropy_coeff: 0.009999999999999998
          kl: 0.013368612303959537
          policy_loss: 0.06408117504583465
          total_loss: 0.8323161588774787
          vf_explained_var: 0.2076347917318344
          vf_loss: 0.7801491561863158
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,167,4313.07,167000,-28.153,-23.2,-34.9,281.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-28_22-18-18
  done: false
  episode_len_mean: 282.52
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.252000000000137
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 3
  episodes_total: 518
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.193715849187639
          entropy_coeff: 0.009999999999999998
          kl: 0.009772996566289688
          policy_loss: -0.06490289713773463
          total_loss: 1.1919547617435455
          vf_explained_var: 0.3532863259315491
          vf_loss: 1.2685339166058434
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,168,4334.41,168000,-28.252,-23.2,-34.9,282.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-28_22-18-41
  done: false
  episode_len_mean: 283.83
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.38300000000013
  episode_reward_min: -38.90000000000028
  episodes_this_iter: 3
  episodes_total: 521
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.178144007258945
          entropy_coeff: 0.009999999999999998
          kl: 0.007359454945816779
          policy_loss: -0.08914957609441546
          total_loss: 1.4884442620807223
          vf_explained_var: 0.2613014280796051
          vf_loss: 1.589178811179267
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,169,4357.84,169000,-28.383,-23.2,-38.9,283.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-28_22-19-05
  done: false
  episode_len_mean: 282.94
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.29400000000013
  episode_reward_min: -38.90000000000028
  episodes_this_iter: 4
  episodes_total: 525
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02669677734375
          cur_lr: 5.000000000000001e-05
          entropy: 1.0578697946336535
          entropy_coeff: 0.009999999999999998
          kl: 0.002916566581938203
          policy_loss: 0.05831274547510677
          total_loss: 1.2554070499208239
          vf_explained_var: 0.3645351827144623
          vf_loss: 1.2075951556364695
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,170,4381.33,170000,-28.294,-23.2,-38.9,282.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-28_22-19-28
  done: false
  episode_len_mean: 283.09
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.30900000000013
  episode_reward_min: -38.90000000000028
  episodes_this_iter: 3
  episodes_total: 528
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.013348388671875
          cur_lr: 5.000000000000001e-05
          entropy: 1.2074505521191492
          entropy_coeff: 0.009999999999999998
          kl: 0.004304620998217166
          policy_loss: 0.0799685618115796
          total_loss: 1.1016572190655602
          vf_explained_var: 0.336355984210968
          vf_loss: 1.033705707680848
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,171,4404.76,171000,-28.309,-23.2,-38.9,283.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-28_22-19-49
  done: false
  episode_len_mean: 284.48
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.448000000000143
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 531
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0066741943359375
          cur_lr: 5.000000000000001e-05
          entropy: 1.252164351940155
          entropy_coeff: 0.009999999999999998
          kl: 0.0109746451702924
          policy_loss: 0.13231607890791364
          total_loss: 0.7991059680779775
          vf_explained_var: 0.16991356015205383
          vf_loss: 0.679238281523188
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,172,4425.73,172000,-28.448,-23.2,-39,284.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-28_22-20-11
  done: false
  episode_len_mean: 285.12
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.51200000000014
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 2
  episodes_total: 533
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0066741943359375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2629548867543539
          entropy_coeff: 0.009999999999999998
          kl: 0.011771512530809798
          policy_loss: -0.07550003818339772
          total_loss: 1.3142846286296845
          vf_explained_var: 0.054233647882938385
          vf_loss: 1.402335649066501
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,173,4447.16,173000,-28.512,-23.2,-39,285.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-28_22-20-34
  done: false
  episode_len_mean: 286.95
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.695000000000142
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 537
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0066741943359375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1494477073351541
          entropy_coeff: 0.009999999999999998
          kl: 0.005982214059963158
          policy_loss: 0.03766225609514448
          total_loss: 1.6463349143664041
          vf_explained_var: 0.050110090523958206
          vf_loss: 1.6201272130012512
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,174,4470.41,174000,-28.695,-23.2,-39,286.95




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-28_22-21-18
  done: false
  episode_len_mean: 286.06
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.606000000000137
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 541
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0066741943359375
          cur_lr: 5.000000000000001e-05
          entropy: 1.0668840865294138
          entropy_coeff: 0.009999999999999998
          kl: 0.008223610388073609
          policy_loss: -0.015116182549132242
          total_loss: 1.6214987595876058
          vf_explained_var: 0.10287333279848099
          vf_loss: 1.64722890191608
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,175,4514.61,175000,-28.606,-23.2,-39,286.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-28_22-21-45
  done: false
  episode_len_mean: 285.34
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.534000000000137
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 544
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0066741943359375
          cur_lr: 5.000000000000001e-05
          entropy: 0.7956090609232584
          entropy_coeff: 0.009999999999999998
          kl: 0.0049402018729292905
          policy_loss: -0.0785466656088829
          total_loss: 1.2445809986856249
          vf_explained_var: 0.23171043395996094
          vf_loss: 1.3310507893562318
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,176,4541.18,176000,-28.534,-23.2,-39,285.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-28_22-22-09
  done: false
  episode_len_mean: 286.18
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.618000000000134
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 548
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00333709716796875
          cur_lr: 5.000000000000001e-05
          entropy: 1.0073731217119428
          entropy_coeff: 0.009999999999999998
          kl: 0.009011375421074755
          policy_loss: 0.010870368530352911
          total_loss: 1.7292358928256564
          vf_explained_var: 0.08932068943977356
          vf_loss: 1.7284091777271695
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,177,4564.93,177000,-28.618,-23.2,-39,286.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-28_22-22-36
  done: false
  episode_len_mean: 285.77
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.577000000000133
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 551
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00333709716796875
          cur_lr: 5.000000000000001e-05
          entropy: 0.8852457033263312
          entropy_coeff: 0.009999999999999998
          kl: 0.009094431114290952
          policy_loss: -0.04471357671750916
          total_loss: 1.2499687049123975
          vf_explained_var: 0.02864585444331169
          vf_loss: 1.3035043941603766
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,178,4592.07,178000,-28.577,-23.2,-39,285.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-28_22-23-00
  done: false
  episode_len_mean: 284.51
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.451000000000132
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 555
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00333709716796875
          cur_lr: 5.000000000000001e-05
          entropy: 0.8786815100246006
          entropy_coeff: 0.009999999999999998
          kl: 0.009147522403830127
          policy_loss: 0.013923786911699506
          total_loss: 1.5753054777781168
          vf_explained_var: 0.09997525811195374
          vf_loss: 1.5701379934946695
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,179,4616.56,179000,-28.451,-23.2,-39,284.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-28_22-23-29
  done: false
  episode_len_mean: 283.66
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.36600000000013
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 559
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00333709716796875
          cur_lr: 5.000000000000001e-05
          entropy: 0.7836745195918613
          entropy_coeff: 0.009999999999999998
          kl: 0.006473696865738848
          policy_loss: 0.0026833408408694797
          total_loss: 1.4426414410273234
          vf_explained_var: 0.16916587948799133
          vf_loss: 1.447773241996765
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,180,4644.88,180000,-28.366,-23.2,-39,283.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-28_22-23-57
  done: false
  episode_len_mean: 282.7
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.270000000000138
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 563
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00333709716796875
          cur_lr: 5.000000000000001e-05
          entropy: 0.654450723528862
          entropy_coeff: 0.009999999999999998
          kl: 0.057665828456375065
          policy_loss: 0.033529051765799525
          total_loss: 1.1028582387500339
          vf_explained_var: 0.3984348177909851
          vf_loss: 1.0756812473138173
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,181,4673.54,181000,-28.27,-23.2,-39,282.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-28_22-24-26
  done: false
  episode_len_mean: 281.54
  episode_media: {}
  episode_reward_max: -23.20000000000006
  episode_reward_mean: -28.15400000000013
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 567
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005005645751953126
          cur_lr: 5.000000000000001e-05
          entropy: 0.5784509135617151
          entropy_coeff: 0.009999999999999998
          kl: 0.0047652476979376645
          policy_loss: 0.03983547869655821
          total_loss: 1.0279278185632494
          vf_explained_var: 0.507790207862854
          vf_loss: 0.9938529895411597
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,182,4702.5,182000,-28.154,-23.2,-39,281.54




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-28_22-25-14
  done: false
  episode_len_mean: 279.49
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.949000000000133
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 571
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.6501347356372409
          entropy_coeff: 0.009999999999999998
          kl: 0.015479267016911694
          policy_loss: -0.04178032928870784
          total_loss: 0.9102720085117552
          vf_explained_var: 0.5812757015228271
          vf_loss: 0.9585149407386779
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,183,4750.58,183000,-27.949,-21.1,-39,279.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-28_22-25-44
  done: false
  episode_len_mean: 277.08
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.70800000000013
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 5
  episodes_total: 576
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.6389384865760803
          entropy_coeff: 0.009999999999999998
          kl: 0.017104171293137918
          policy_loss: 0.0011862665000889037
          total_loss: 1.059883842865626
          vf_explained_var: 0.7297993898391724
          vf_loss: 1.065044151412116
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,184,4780.56,184000,-27.708,-21.1,-39,277.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-28_22-26-12
  done: false
  episode_len_mean: 277.19
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.71900000000013
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 579
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.8046581248442332
          entropy_coeff: 0.009999999999999998
          kl: 0.00924931830031331
          policy_loss: 0.02337813658846749
          total_loss: 0.7431981119844648
          vf_explained_var: 0.7854723334312439
          vf_loss: 0.7278434144126045
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,185,4808.5,185000,-27.719,-21.1,-39,277.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-28_22-26-40
  done: false
  episode_len_mean: 276.94
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.69400000000012
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 583
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.7958635555373298
          entropy_coeff: 0.009999999999999998
          kl: 0.00689173925613527
          policy_loss: 0.034591228225164944
          total_loss: 0.6815330319934421
          vf_explained_var: 0.6965887546539307
          vf_loss: 0.6548831866847145
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,186,4836.45,186000,-27.694,-21.1,-39,276.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-28_22-27-08
  done: false
  episode_len_mean: 276.28
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.628000000000124
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 587
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.7865736583868662
          entropy_coeff: 0.009999999999999998
          kl: 0.008854477899079269
          policy_loss: -0.04594624721341663
          total_loss: 0.9001395394404729
          vf_explained_var: 0.6678404211997986
          vf_loss: 0.9539293640189701
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,187,4863.87,187000,-27.628,-21.1,-39,276.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-28_22-27-29
  done: false
  episode_len_mean: 278.19
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.819000000000127
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 3
  episodes_total: 590
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 1.0890026926994323
          entropy_coeff: 0.009999999999999998
          kl: 0.009403712296394091
          policy_loss: -0.04125367452700933
          total_loss: 0.6431984042127927
          vf_explained_var: 0.40355750918388367
          vf_loss: 0.6953185570322805
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,188,4885,188000,-27.819,-21.1,-39,278.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-28_22-27-55
  done: false
  episode_len_mean: 278.46
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.84600000000012
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 594
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.8990407890743679
          entropy_coeff: 0.009999999999999998
          kl: 0.013893173247994599
          policy_loss: 0.015427888764275446
          total_loss: 1.0903213633431328
          vf_explained_var: 0.5227118730545044
          vf_loss: 1.0838491045766407
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,189,4910.94,189000,-27.846,-21.1,-39,278.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-28_22-28-16
  done: false
  episode_len_mean: 280.8
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -28.080000000000126
  episode_reward_min: -39.50000000000029
  episodes_this_iter: 3
  episodes_total: 597
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 1.0190729823377398
          entropy_coeff: 0.009999999999999998
          kl: 0.011210091029949017
          policy_loss: 0.06615114981929461
          total_loss: 0.9448159651623832
          vf_explained_var: -0.097284235060215
          vf_loss: 0.8888274985055129
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,190,4931.96,190000,-28.08,-21.1,-39.5,280.8




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-28_22-28-56
  done: false
  episode_len_mean: 281.94
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -28.194000000000138
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 3
  episodes_total: 600
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.7734509719742669
          entropy_coeff: 0.009999999999999998
          kl: 0.009473562782176417
          policy_loss: -0.0781173985865381
          total_loss: 1.037691103087531
          vf_explained_var: 0.010542151518166065
          vf_loss: 1.1235193196270201
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,191,4971.98,191000,-28.194,-21.1,-41.1,281.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-28_22-29-24
  done: false
  episode_len_mean: 282.9
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -28.290000000000138
  episode_reward_min: -41.100000000000314
  episodes_this_iter: 4
  episodes_total: 604
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.8136290609836578
          entropy_coeff: 0.009999999999999998
          kl: 0.01930648533762217
          policy_loss: -0.03978061543570625
          total_loss: 1.2195246107048459
          vf_explained_var: 0.261535108089447
          vf_loss: 1.2673931890063816
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,192,4999.91,192000,-28.29,-21.1,-41.1,282.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-28_22-29-46
  done: false
  episode_len_mean: 285.01
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -28.501000000000136
  episode_reward_min: -41.70000000000032
  episodes_this_iter: 3
  episodes_total: 607
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 1.0731629769007365
          entropy_coeff: 0.009999999999999998
          kl: 0.009048045209997567
          policy_loss: -0.004001609484354655
          total_loss: 1.0476696564091577
          vf_explained_var: 0.017585797235369682
          vf_loss: 1.0623802524473933
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,193,5021.98,193000,-28.501,-21.1,-41.7,285.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-28_22-30-08
  done: false
  episode_len_mean: 287.54
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -28.754000000000136
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 610
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.8357133977942997
          entropy_coeff: 0.009999999999999998
          kl: 0.01187250149023116
          policy_loss: 0.09057098536027802
          total_loss: 0.7667597095171611
          vf_explained_var: 0.0742122009396553
          vf_loss: 0.684516139059431
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,194,5044.11,194000,-28.754,-21.1,-45.2,287.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-28_22-30-31
  done: false
  episode_len_mean: 286.79
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -28.679000000000137
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 613
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.9633942094114092
          entropy_coeff: 0.009999999999999998
          kl: 0.0065724735740082234
          policy_loss: 0.0728107756210698
          total_loss: 0.8601545655065113
          vf_explained_var: 0.28225424885749817
          vf_loss: 0.7969612797101339
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,195,5066.99,195000,-28.679,-21.1,-45.2,286.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-28_22-30-58
  done: false
  episode_len_mean: 285.01
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -28.501000000000136
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 617
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.8757025480270386
          entropy_coeff: 0.009999999999999998
          kl: 0.013545173669955684
          policy_loss: -0.07827066944705116
          total_loss: 1.2830213162634108
          vf_explained_var: 0.5239158868789673
          vf_loss: 1.3700151138835484
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,196,5094.04,196000,-28.501,-21.1,-45.2,285.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-28_22-31-23
  done: false
  episode_len_mean: 284.19
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -28.419000000000132
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 620
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 1.0299867775705125
          entropy_coeff: 0.009999999999999998
          kl: 0.008291815586752212
          policy_loss: 0.07318577352497313
          total_loss: 1.2179087069299486
          vf_explained_var: 0.094568632543087
          vf_loss: 1.1550020509295993
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,197,5118.74,197000,-28.419,-21.1,-45.2,284.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-28_22-31-49
  done: false
  episode_len_mean: 282.54
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -28.25400000000013
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 624
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002502822875976563
          cur_lr: 5.000000000000001e-05
          entropy: 0.8568729374143812
          entropy_coeff: 0.009999999999999998
          kl: 0.00485025668248961
          policy_loss: 0.019055890209145016
          total_loss: 0.4860804107454088
          vf_explained_var: 0.8232036232948303
          vf_loss: 0.47558111432525846
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,198,5145.1,198000,-28.254,-21.1,-45.2,282.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-28_22-32-17
  done: false
  episode_len_mean: 279.87
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.987000000000126
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 628
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012514114379882815
          cur_lr: 5.000000000000001e-05
          entropy: 0.9511358539263407
          entropy_coeff: 0.009999999999999998
          kl: 0.012247033719302497
          policy_loss: -0.0019086806724468866
          total_loss: 0.8205401791466607
          vf_explained_var: 0.5141657590866089
          vf_loss: 0.8319448838631313
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,199,5172.98,199000,-27.987,-21.1,-45.2,279.87




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-28_22-33-01
  done: false
  episode_len_mean: 278.17
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.81700000000013
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 631
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012514114379882815
          cur_lr: 5.000000000000001e-05
          entropy: 1.152874779038959
          entropy_coeff: 0.009999999999999998
          kl: 0.03027294312661534
          policy_loss: -0.07591564779480299
          total_loss: 1.5475063860416411
          vf_explained_var: 0.1844785064458847
          vf_loss: 1.6349128819174237
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,200,5216.42,200000,-27.817,-21.1,-45.2,278.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-28_22-33-25
  done: false
  episode_len_mean: 276.53
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.65300000000012
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 635
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 1.1796620607376098
          entropy_coeff: 0.009999999999999998
          kl: 0.013277617868872977
          policy_loss: -0.0036061101903518042
          total_loss: 0.8416756090190676
          vf_explained_var: 0.58968186378479
          vf_loss: 0.8570534163051181
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,201,5240.4,201000,-27.653,-21.1,-45.2,276.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-28_22-33-49
  done: false
  episode_len_mean: 276.34
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.634000000000114
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 638
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 1.1373557223214044
          entropy_coeff: 0.009999999999999998
          kl: 0.0072915727511359465
          policy_loss: 0.03184932652446959
          total_loss: 1.0067570083671147
          vf_explained_var: 0.1348136067390442
          vf_loss: 0.9862675574918588
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,202,5264.99,202000,-27.634,-21.1,-45.2,276.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-28_22-34-16
  done: false
  episode_len_mean: 277.53
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.75300000000012
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 642
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 1.054878252082401
          entropy_coeff: 0.009999999999999998
          kl: 0.0067423380747723164
          policy_loss: 0.018918987777498033
          total_loss: 1.3103727142016093
          vf_explained_var: 0.1580076664686203
          vf_loss: 1.301989867952135
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,203,5291.39,203000,-27.753,-21.1,-45.2,277.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-28_22-34-43
  done: false
  episode_len_mean: 276.97
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.69700000000012
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 645
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 1.047485617134306
          entropy_coeff: 0.009999999999999998
          kl: 0.004705543717798995
          policy_loss: -0.10029698626862632
          total_loss: 1.1245852814780342
          vf_explained_var: 0.26232704520225525
          vf_loss: 1.2353482829199898
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,204,5318.78,204000,-27.697,-21.1,-45.2,276.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-28_22-35-08
  done: false
  episode_len_mean: 276.45
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.645000000000127
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 649
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.1035472128126356
          entropy_coeff: 0.009999999999999998
          kl: 0.0053478998594834815
          policy_loss: 0.01295273419883516
          total_loss: 1.3896658817927043
          vf_explained_var: 0.10970483720302582
          vf_loss: 1.3877435949113635
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,205,5343.99,205000,-27.645,-21.1,-45.2,276.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-28_22-35-34
  done: false
  episode_len_mean: 277.14
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.714000000000127
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 652
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.052094931072659
          entropy_coeff: 0.009999999999999998
          kl: 0.010689737719704577
          policy_loss: -0.06424555828173956
          total_loss: 1.0394634147485098
          vf_explained_var: 0.13895341753959656
          vf_loss: 1.1142198781172434
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 206000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,206,5369.49,206000,-27.714,-21.1,-45.2,277.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-28_22-35-59
  done: false
  episode_len_mean: 277.16
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.71600000000012
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 656
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.0660494844118753
          entropy_coeff: 0.009999999999999998
          kl: 0.013295029747615199
          policy_loss: 0.021818486435545816
          total_loss: 1.0003540641731685
          vf_explained_var: 0.44882163405418396
          vf_loss: 0.9891836057106654
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,207,5395.03,207000,-27.716,-21.1,-45.2,277.16




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-28_22-36-42
  done: false
  episode_len_mean: 277.76
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.77600000000012
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 660
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.083374454577764
          entropy_coeff: 0.009999999999999998
          kl: 0.00917156922938543
          policy_loss: 0.03725627387563388
          total_loss: 1.3819734189245436
          vf_explained_var: 0.1933537870645523
          vf_loss: 1.3555422829257118
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,208,5437.66,208000,-27.776,-21.1,-45.2,277.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-28_22-37-11
  done: false
  episode_len_mean: 278.68
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.86800000000013
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 663
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.142830647362603
          entropy_coeff: 0.009999999999999998
          kl: 0.0069363532945952815
          policy_loss: -0.09301637924379773
          total_loss: 1.2658921241760255
          vf_explained_var: 0.13386155664920807
          vf_loss: 1.3703303019205728
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,209,5466.82,209000,-27.868,-21.1,-45.2,278.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-28_22-37-36
  done: false
  episode_len_mean: 279.96
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.99600000000013
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 667
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.2063460071881613
          entropy_coeff: 0.009999999999999998
          kl: 0.009964320814584936
          policy_loss: 0.036415239837434554
          total_loss: 1.0911635047859616
          vf_explained_var: 0.14506368339061737
          vf_loss: 1.0668023801512188
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,210,5492.04,210000,-27.996,-21.1,-45.2,279.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-28_22-38-00
  done: false
  episode_len_mean: 282.13
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.21300000000013
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 670
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.141518807411194
          entropy_coeff: 0.009999999999999998
          kl: 0.01001003875283538
          policy_loss: -0.08676733407709333
          total_loss: 1.269409105512831
          vf_explained_var: 0.12770527601242065
          vf_loss: 1.3675822456677755
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,211,5515.93,211000,-28.213,-21.8,-45.2,282.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-28_22-38-26
  done: false
  episode_len_mean: 284.25
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.425000000000136
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 674
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.1313249839676751
          entropy_coeff: 0.009999999999999998
          kl: 0.0077574892503814215
          policy_loss: 0.01797632657819324
          total_loss: 1.4200096329053242
          vf_explained_var: 0.13042712211608887
          vf_loss: 1.4133392638630338
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,212,5541.42,212000,-28.425,-21.8,-45.2,284.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-28_22-38-50
  done: false
  episode_len_mean: 285.59
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.559000000000136
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 677
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.057171399725808
          entropy_coeff: 0.009999999999999998
          kl: 0.011348455424569122
          policy_loss: 0.05022828976313273
          total_loss: 1.0003509206904306
          vf_explained_var: 0.034824784845113754
          vf_loss: 0.9606836894320117
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,213,5565.15,213000,-28.559,-21.8,-45.2,285.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-28_22-39-16
  done: false
  episode_len_mean: 287.04
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.704000000000132
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 681
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.9199957291285197
          entropy_coeff: 0.009999999999999998
          kl: 0.007548730046218329
          policy_loss: 0.030363693171077306
          total_loss: 1.3533186581399705
          vf_explained_var: 0.054528433829545975
          vf_loss: 1.3321478287378947
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,214,5591.32,214000,-28.704,-21.8,-45.2,287.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-28_22-39-44
  done: false
  episode_len_mean: 287.38
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.738000000000138
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 685
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.8868071834246317
          entropy_coeff: 0.009999999999999998
          kl: 0.008605295942279333
          policy_loss: -0.0016860389875041114
          total_loss: 1.2648736496766408
          vf_explained_var: 0.2584000825881958
          vf_loss: 1.2754196756415896
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,215,5619.29,215000,-28.738,-21.8,-45.2,287.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-28_22-40-11
  done: false
  episode_len_mean: 287.5
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.750000000000135
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 688
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.894763155778249
          entropy_coeff: 0.009999999999999998
          kl: 0.005556186835470816
          policy_loss: 0.02626074966457155
          total_loss: 1.1578004353576237
          vf_explained_var: 0.1267077475786209
          vf_loss: 1.1404820831285583
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,216,5646.23,216000,-28.75,-21.8,-45.2,287.5




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-28_22-40-54
  done: false
  episode_len_mean: 286.09
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.609000000000133
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 692
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.9507603757911258
          entropy_coeff: 0.009999999999999998
          kl: 0.005778575857560951
          policy_loss: 0.0038258007003201377
          total_loss: 1.4143942144181993
          vf_explained_var: 0.28556495904922485
          vf_loss: 1.420070587264167
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,217,5689.28,217000,-28.609,-21.8,-45.2,286.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-28_22-41-20
  done: false
  episode_len_mean: 286.22
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.62200000000013
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 695
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.0006381551424661
          entropy_coeff: 0.009999999999999998
          kl: 0.007364455146027006
          policy_loss: -0.09429023596975539
          total_loss: 1.4789654546313815
          vf_explained_var: 0.07718778401613235
          vf_loss: 1.5832551691267225
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,218,5715.31,218000,-28.622,-21.8,-45.2,286.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-28_22-41-47
  done: false
  episode_len_mean: 282.91
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.291000000000125
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 699
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.032648316356871
          entropy_coeff: 0.009999999999999998
          kl: 0.006515296479636214
          policy_loss: 0.0020521945423550075
          total_loss: 1.5195515314737955
          vf_explained_var: 0.08496259152889252
          vf_loss: 1.5278197036849128
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,219,5741.99,219000,-28.291,-21.8,-45.2,282.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-28_22-42-14
  done: false
  episode_len_mean: 282.34
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.234000000000133
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 4
  episodes_total: 703
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.9203780591487885
          entropy_coeff: 0.009999999999999998
          kl: 0.005696953831568249
          policy_loss: 0.02688730052775807
          total_loss: 1.3387911690606011
          vf_explained_var: 0.2000974863767624
          vf_loss: 1.3211022860474058
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,220,5769.33,220000,-28.234,-22.1,-45.2,282.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-28_22-42-39
  done: false
  episode_len_mean: 280.43
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.04300000000013
  episode_reward_min: -45.20000000000037
  episodes_this_iter: 3
  episodes_total: 706
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.959606264034907
          entropy_coeff: 0.009999999999999998
          kl: 0.007873677091324222
          policy_loss: -0.10278771825962596
          total_loss: 1.366813604036967
          vf_explained_var: 0.039102040231227875
          vf_loss: 1.479190014468299
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,221,5794.48,221000,-28.043,-22.1,-45.2,280.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-28_22-43-07
  done: false
  episode_len_mean: 279.2
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.920000000000126
  episode_reward_min: -36.80000000000025
  episodes_this_iter: 4
  episodes_total: 710
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.8450899852646722
          entropy_coeff: 0.009999999999999998
          kl: 0.005043654781477377
          policy_loss: 0.00669951621029112
          total_loss: 1.3915840480062696
          vf_explained_var: 0.15427058935165405
          vf_loss: 1.3933307117886014
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,222,5821.81,222000,-27.92,-22.1,-36.8,279.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-28_22-43-33
  done: false
  episode_len_mean: 277.5
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.750000000000128
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 714
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.9504373841815524
          entropy_coeff: 0.009999999999999998
          kl: 0.002906604470220368
          policy_loss: 0.033660253137350084
          total_loss: 1.3373743878470528
          vf_explained_var: 0.1354057341814041
          vf_loss: 1.313215774959988
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,223,5848.6,223000,-27.75,-22.1,-34.5,277.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-28_22-43-59
  done: false
  episode_len_mean: 278.65
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.865000000000123
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 3
  episodes_total: 717
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004692792892456053
          cur_lr: 5.000000000000001e-05
          entropy: 0.8628014630741543
          entropy_coeff: 0.009999999999999998
          kl: 0.006233575918468483
          policy_loss: 0.01926225274801254
          total_loss: 1.1270601633522246
          vf_explained_var: -0.1438789665699005
          vf_loss: 1.116423004037804
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,224,5874.33,224000,-27.865,-22.1,-34.5,278.65




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-28_22-44-43
  done: false
  episode_len_mean: 277.97
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.79700000000013
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 721
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004692792892456053
          cur_lr: 5.000000000000001e-05
          entropy: 0.9415697448783451
          entropy_coeff: 0.009999999999999998
          kl: 0.006295100869300481
          policy_loss: 0.021023865747782918
          total_loss: 1.0859117368857065
          vf_explained_var: 0.34808534383773804
          vf_loss: 1.0743006202909682
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,225,5918.34,225000,-27.797,-22.1,-33.9,277.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-28_22-45-09
  done: false
  episode_len_mean: 277.64
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.764000000000127
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 725
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004692792892456053
          cur_lr: 5.000000000000001e-05
          entropy: 0.777777738041348
          entropy_coeff: 0.009999999999999998
          kl: 0.01759435842674727
          policy_loss: 0.08022679674128691
          total_loss: 1.3891530242231158
          vf_explained_var: 0.5110216736793518
          vf_loss: 1.316695746448305
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,226,5944.44,226000,-27.764,-22.1,-33.9,277.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-28_22-45-37
  done: false
  episode_len_mean: 277.44
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.744000000000128
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 729
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004692792892456053
          cur_lr: 5.000000000000001e-05
          entropy: 0.8276437487867143
          entropy_coeff: 0.009999999999999998
          kl: 0.007976098401498311
          policy_loss: -0.0015465138687027825
          total_loss: 1.2930035538143583
          vf_explained_var: 0.28216153383255005
          vf_loss: 1.3028227640522851
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,227,5971.8,227000,-27.744,-22.1,-33.9,277.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-28_22-46-05
  done: false
  episode_len_mean: 276.6
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.660000000000128
  episode_reward_min: -33.90000000000021
  episodes_this_iter: 4
  episodes_total: 733
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004692792892456053
          cur_lr: 5.000000000000001e-05
          entropy: 0.7245967719289992
          entropy_coeff: 0.009999999999999998
          kl: 0.006134231452651898
          policy_loss: 0.022869208289517298
          total_loss: 0.9578894419802559
          vf_explained_var: 0.4780898988246918
          vf_loss: 0.9422633091608683
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,228,6000.25,228000,-27.66,-22.1,-33.9,276.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-28_22-46-34
  done: false
  episode_len_mean: 273.92
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.39200000000012
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 737
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004692792892456053
          cur_lr: 5.000000000000001e-05
          entropy: 0.6074805166986254
          entropy_coeff: 0.009999999999999998
          kl: 0.006898636458299892
          policy_loss: 0.025506802399953208
          total_loss: 0.9727894597583346
          vf_explained_var: 0.6190515160560608
          vf_loss: 0.9533542301919725
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,229,6029.1,229000,-27.392,-22.1,-32.5,273.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-28_22-47-02
  done: false
  episode_len_mean: 272.98
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.298000000000116
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 741
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004692792892456053
          cur_lr: 5.000000000000001e-05
          entropy: 0.8528289669089847
          entropy_coeff: 0.009999999999999998
          kl: 0.04554550590614867
          policy_loss: 0.028624242254429392
          total_loss: 1.9311457249853345
          vf_explained_var: 0.031624916940927505
          vf_loss: 1.9110284076796638
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,230,6057.45,230000,-27.298,-22.1,-32.5,272.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-28_22-47-30
  done: false
  episode_len_mean: 272.84
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.28400000000012
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 3
  episodes_total: 744
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007039189338684084
          cur_lr: 5.000000000000001e-05
          entropy: 0.9046689331531524
          entropy_coeff: 0.009999999999999998
          kl: 0.005418968174985089
          policy_loss: 0.0033378158178594378
          total_loss: 1.1334520853228038
          vf_explained_var: 0.0780976265668869
          vf_loss: 1.139157157142957
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,231,6084.69,231000,-27.284,-22.1,-32.5,272.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-28_22-47-54
  done: false
  episode_len_mean: 273.47
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.347000000000122
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 748
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007039189338684084
          cur_lr: 5.000000000000001e-05
          entropy: 0.9673874874909719
          entropy_coeff: 0.009999999999999998
          kl: 0.00591876850411788
          policy_loss: 0.010279818955394957
          total_loss: 1.700628301832411
          vf_explained_var: 0.03418874740600586
          vf_loss: 1.7000181939866807
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,232,6109.09,232000,-27.347,-22.1,-32.5,273.47




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-28_22-48-37
  done: false
  episode_len_mean: 273.25
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.325000000000117
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 3
  episodes_total: 751
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007039189338684084
          cur_lr: 5.000000000000001e-05
          entropy: 0.9343327873282963
          entropy_coeff: 0.009999999999999998
          kl: 0.004391798784891303
          policy_loss: 0.032230697655015524
          total_loss: 1.122067293855879
          vf_explained_var: -0.05906525254249573
          vf_loss: 1.0991768406497107
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,233,6152.14,233000,-27.325,-22.1,-32.5,273.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-28_22-49-03
  done: false
  episode_len_mean: 273.34
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.33400000000012
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 755
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003519594669342042
          cur_lr: 5.000000000000001e-05
          entropy: 0.9518539382351769
          entropy_coeff: 0.009999999999999998
          kl: 0.004602424192581367
          policy_loss: 0.008240746292803023
          total_loss: 1.6761963433689542
          vf_explained_var: 0.050155237317085266
          vf_loss: 1.6774725172254774
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,234,6178.06,234000,-27.334,-22.1,-32.5,273.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-28_22-49-26
  done: false
  episode_len_mean: 273.86
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.386000000000124
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 3
  episodes_total: 758
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001759797334671021
          cur_lr: 5.000000000000001e-05
          entropy: 1.0168355060948266
          entropy_coeff: 0.009999999999999998
          kl: 0.006518114852994788
          policy_loss: -0.046909770535098184
          total_loss: 1.0138135297430886
          vf_explained_var: -0.2291511595249176
          vf_loss: 1.0708905171602965
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,235,6201.37,235000,-27.386,-22.1,-32.5,273.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-28_22-49-48
  done: false
  episode_len_mean: 275.93
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.593000000000117
  episode_reward_min: -35.70000000000024
  episodes_this_iter: 3
  episodes_total: 761
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001759797334671021
          cur_lr: 5.000000000000001e-05
          entropy: 0.9704301423496671
          entropy_coeff: 0.009999999999999998
          kl: 0.012360600336519114
          policy_loss: -0.03714003894064161
          total_loss: 1.1707042309972975
          vf_explained_var: 0.08525412529706955
          vf_loss: 1.2175464015040132
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,236,6222.72,236000,-27.593,-22.1,-35.7,275.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-28_22-50-14
  done: false
  episode_len_mean: 276.67
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.667000000000122
  episode_reward_min: -35.70000000000024
  episodes_this_iter: 4
  episodes_total: 765
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001759797334671021
          cur_lr: 5.000000000000001e-05
          entropy: 0.876914542251163
          entropy_coeff: 0.009999999999999998
          kl: 0.0054828555066593135
          policy_loss: 0.03817020646399922
          total_loss: 1.346152396996816
          vf_explained_var: 0.1178799420595169
          vf_loss: 1.3167503754297891
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,237,6248.6,237000,-27.667,-22.1,-35.7,276.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-28_22-50-39
  done: false
  episode_len_mean: 276.23
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.623000000000115
  episode_reward_min: -35.70000000000024
  episodes_this_iter: 3
  episodes_total: 768
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001759797334671021
          cur_lr: 5.000000000000001e-05
          entropy: 0.9205922497643365
          entropy_coeff: 0.009999999999999998
          kl: 0.004397253496933157
          policy_loss: -0.03900704731543859
          total_loss: 1.4046115769280327
          vf_explained_var: -0.020011913031339645
          vf_loss: 1.452823770046234
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,238,6274.16,238000,-27.623,-22.1,-35.7,276.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-28_22-51-03
  done: false
  episode_len_mean: 275.92
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.592000000000116
  episode_reward_min: -35.70000000000024
  episodes_this_iter: 3
  episodes_total: 771
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.798986673355105e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9465121017562018
          entropy_coeff: 0.009999999999999998
          kl: 0.008064284615274748
          policy_loss: -0.09493482212225596
          total_loss: 1.558587998814053
          vf_explained_var: 0.04798077419400215
          vf_loss: 1.6629872308837044
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,239,6298.27,239000,-27.592,-22.1,-35.7,275.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-28_22-51-28
  done: false
  episode_len_mean: 276.69
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.669000000000114
  episode_reward_min: -35.70000000000024
  episodes_this_iter: 4
  episodes_total: 775
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.798986673355105e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9705772373411391
          entropy_coeff: 0.009999999999999998
          kl: 0.007639340033321624
          policy_loss: 0.024431411425272623
          total_loss: 1.4865494648615518
          vf_explained_var: 0.09589090198278427
          vf_loss: 1.471823145283593
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,240,6323.39,240000,-27.669,-22.1,-35.7,276.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-28_22-51-53
  done: false
  episode_len_mean: 276.85
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.685000000000123
  episode_reward_min: -35.70000000000024
  episodes_this_iter: 3
  episodes_total: 778
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.798986673355105e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0301869498358833
          entropy_coeff: 0.009999999999999998
          kl: 0.009133587268978892
          policy_loss: 0.02252484957377116
          total_loss: 1.2359994563791488
          vf_explained_var: -0.03430386260151863
          vf_loss: 1.2237756705946392
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,241,6347.5,241000,-27.685,-22.1,-35.7,276.85




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-28_22-52-32
  done: false
  episode_len_mean: 278.39
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.839000000000127
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 3
  episodes_total: 781
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.798986673355105e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0022911661201053
          entropy_coeff: 0.009999999999999998
          kl: 0.01577519091726111
          policy_loss: -0.05314381602737639
          total_loss: 1.1357286946641074
          vf_explained_var: 0.13895538449287415
          vf_loss: 1.1988940306421783
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,242,6386.81,242000,-27.839,-22.1,-36.7,278.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-28_22-52-57
  done: false
  episode_len_mean: 280.22
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.022000000000126
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 3
  episodes_total: 784
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.798986673355105e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9084832787513732
          entropy_coeff: 0.009999999999999998
          kl: 0.012270998208918726
          policy_loss: -0.11141384806897905
          total_loss: 1.5722250037723118
          vf_explained_var: 0.03735209256410599
          vf_loss: 1.6927226013607448
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,243,6411.74,243000,-28.022,-22.1,-36.7,280.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-28_22-53-22
  done: false
  episode_len_mean: 280.9
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.09000000000013
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 788
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.798986673355105e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8388968467712402
          entropy_coeff: 0.009999999999999998
          kl: 0.002849305802126641
          policy_loss: 0.023840496275160047
          total_loss: 1.6224363101853265
          vf_explained_var: 0.055041711777448654
          vf_loss: 1.6069845318794251
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,244,6437.09,244000,-28.09,-22.1,-36.7,280.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-28_22-53-49
  done: false
  episode_len_mean: 280.94
  episode_media: {}
  episode_reward_max: -23.600000000000065
  episode_reward_mean: -28.094000000000122
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 792
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3994933366775524e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8577484336164263
          entropy_coeff: 0.009999999999999998
          kl: 0.0008028399799482679
          policy_loss: 0.0051330865257316165
          total_loss: 1.608640934361352
          vf_explained_var: 0.048423733562231064
          vf_loss: 1.6120852894253201
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,245,6463.53,245000,-28.094,-23.6,-36.7,280.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-28_22-54-14
  done: false
  episode_len_mean: 280.32
  episode_media: {}
  episode_reward_max: -23.600000000000065
  episode_reward_mean: -28.03200000000013
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 3
  episodes_total: 795
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1997466683387762e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8378471824857924
          entropy_coeff: 0.009999999999999998
          kl: 0.003074427805954135
          policy_loss: -0.05115663334727287
          total_loss: 1.066135905848609
          vf_explained_var: 0.20392800867557526
          vf_loss: 1.1256709499491586
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,246,6489.07,246000,-28.032,-23.6,-36.7,280.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-28_22-54-42
  done: false
  episode_len_mean: 280.23
  episode_media: {}
  episode_reward_max: -23.600000000000065
  episode_reward_mean: -28.023000000000128
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 799
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0998733341693881e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7478473140133752
          entropy_coeff: 0.009999999999999998
          kl: 0.012328936755169788
          policy_loss: 0.004946481188138326
          total_loss: 1.2581028329001533
          vf_explained_var: 0.40901684761047363
          vf_loss: 1.260634704430898
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,247,6516.31,247000,-28.023,-23.6,-36.7,280.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-28_22-55-10
  done: false
  episode_len_mean: 279.64
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -27.964000000000127
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 803
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0998733341693881e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6998374183972677
          entropy_coeff: 0.009999999999999998
          kl: 0.008634414389491595
          policy_loss: 0.014793388090199894
          total_loss: 1.4088883757591248
          vf_explained_var: 0.33202382922172546
          vf_loss: 1.4010932703812917
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,248,6545.11,248000,-27.964,-23.1,-36.7,279.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-28_22-55-37
  done: false
  episode_len_mean: 278.64
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -27.864000000000132
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 807
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0998733341693881e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7490528881549835
          entropy_coeff: 0.009999999999999998
          kl: 0.007710296500645026
          policy_loss: 0.0343911183377107
          total_loss: 1.075367890463935
          vf_explained_var: 0.5220416188240051
          vf_loss: 1.048467218875885
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,249,6571.52,249000,-27.864,-23.1,-36.7,278.64




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-28_22-56-22
  done: false
  episode_len_mean: 277.54
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.754000000000122
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 811
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0998733341693881e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7376561615202162
          entropy_coeff: 0.009999999999999998
          kl: 0.006517041761935616
          policy_loss: 0.03607049783070882
          total_loss: 1.011073096593221
          vf_explained_var: 0.6444697380065918
          vf_loss: 0.9823790894614326
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,250,6617.19,250000,-27.754,-20.4,-36.7,277.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-28_22-56-50
  done: false
  episode_len_mean: 277.35
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.735000000000117
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 815
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0998733341693881e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7813095920615726
          entropy_coeff: 0.009999999999999998
          kl: 0.0075028994605214035
          policy_loss: -0.10979679628378815
          total_loss: 0.7917251371675067
          vf_explained_var: 0.7064843773841858
          vf_loss: 0.9093349450164371
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,251,6644.55,251000,-27.735,-20.4,-36.7,277.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-28_22-57-19
  done: false
  episode_len_mean: 276.12
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.612000000000126
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 819
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0998733341693881e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.751933421028985
          entropy_coeff: 0.009999999999999998
          kl: 0.006627383790288377
          policy_loss: -0.12484141579932637
          total_loss: 0.6405275020334456
          vf_explained_var: 0.7060587406158447
          vf_loss: 0.7728881888919407
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,252,6673.19,252000,-27.612,-20.4,-36.7,276.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-28_22-57-47
  done: false
  episode_len_mean: 275.59
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.559000000000125
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 823
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0998733341693881e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7315490908092923
          entropy_coeff: 0.009999999999999998
          kl: 0.0020718042040262694
          policy_loss: 0.009243120004733403
          total_loss: 0.5877789855003357
          vf_explained_var: 0.8344885110855103
          vf_loss: 0.5858513428105249
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,253,6701.53,253000,-27.559,-20.4,-36.7,275.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-28_22-58-15
  done: false
  episode_len_mean: 275.65
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.565000000000126
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 827
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.4993666708469404e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7142697427007887
          entropy_coeff: 0.009999999999999998
          kl: 0.0017870050178346592
          policy_loss: 0.07349478685193592
          total_loss: 0.5786858638127644
          vf_explained_var: 0.8637993931770325
          vf_loss: 0.512333776222335
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,254,6729.94,254000,-27.565,-20.4,-36.7,275.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-28_22-58-41
  done: false
  episode_len_mean: 275.75
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.575000000000124
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 3
  episodes_total: 830
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7496833354234702e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.746637315220303
          entropy_coeff: 0.009999999999999998
          kl: 0.008716577774902293
          policy_loss: -0.03017826428016027
          total_loss: 0.2013991636534532
          vf_explained_var: 0.9387415647506714
          vf_loss: 0.23904378563165665
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,255,6755.97,255000,-27.575,-20.4,-36.7,275.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-28_22-59-11
  done: false
  episode_len_mean: 275.33
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.533000000000126
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 5
  episodes_total: 835
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7496833354234702e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.62045802142885
          entropy_coeff: 0.009999999999999998
          kl: 0.007871673398419116
          policy_loss: 0.02495969898170895
          total_loss: 0.921447237332662
          vf_explained_var: 0.7330175042152405
          vf_loss: 0.9026920997434192
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,256,6785.28,256000,-27.533,-20.4,-36.7,275.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-28_22-59-39
  done: false
  episode_len_mean: 275.09
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.509000000000125
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 3
  episodes_total: 838
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7496833354234702e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7855299168162876
          entropy_coeff: 0.009999999999999998
          kl: 0.008474113248136126
          policy_loss: -0.047798577116595374
          total_loss: 0.5301873919036654
          vf_explained_var: 0.773392915725708
          vf_loss: 0.5858412553866704
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,257,6813.92,257000,-27.509,-20.4,-36.7,275.09




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-28_23-00-27
  done: false
  episode_len_mean: 274.33
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.433000000000124
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 5
  episodes_total: 843
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7496833354234702e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8979960362116496
          entropy_coeff: 0.009999999999999998
          kl: 0.015446691061868497
          policy_loss: 0.0028823599219322204
          total_loss: 0.5286315755711661
          vf_explained_var: 0.7522438168525696
          vf_loss: 0.5347291337119209
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,258,6861.26,258000,-27.433,-20.3,-36.7,274.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-28_23-00-55
  done: false
  episode_len_mean: 273.46
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.346000000000114
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 847
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7496833354234702e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.889164865679211
          entropy_coeff: 0.009999999999999998
          kl: 0.005260959025106812
          policy_loss: 0.037001269807418184
          total_loss: 0.3395590872814258
          vf_explained_var: 0.9440783262252808
          vf_loss: 0.3114494596918424
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,259,6889.13,259000,-27.346,-20.3,-36.7,273.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-28_23-01-22
  done: false
  episode_len_mean: 272.94
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.294000000000114
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 3
  episodes_total: 850
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7496833354234702e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1146948125627305
          entropy_coeff: 0.009999999999999998
          kl: 0.039808181591901436
          policy_loss: 0.018699563211864896
          total_loss: 0.28401802852749825
          vf_explained_var: 0.9084996581077576
          vf_loss: 0.27646530750725007
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,260,6916.33,260000,-27.294,-20.3,-36.7,272.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-28_23-01-48
  done: false
  episode_len_mean: 272.27
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.227000000000118
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 854
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.124525003135205e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2321013424131606
          entropy_coeff: 0.009999999999999998
          kl: 0.010061356545012619
          policy_loss: 0.04116687070992258
          total_loss: 0.4326728572448095
          vf_explained_var: 0.9165179133415222
          vf_loss: 0.4038269639015198
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,261,6942.44,261000,-27.227,-20.3,-36.7,272.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-28_23-02-15
  done: false
  episode_len_mean: 271.88
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.188000000000113
  episode_reward_min: -36.70000000000025
  episodes_this_iter: 4
  episodes_total: 858
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.124525003135205e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3622505399915907
          entropy_coeff: 0.009999999999999998
          kl: 0.038755757087982644
          policy_loss: -0.08437446152998342
          total_loss: 0.7063555950919788
          vf_explained_var: 0.7663059830665588
          vf_loss: 0.8043524180849393
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,262,6969.01,262000,-27.188,-20.3,-36.7,271.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-28_23-02-37
  done: false
  episode_len_mean: 271.78
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.17800000000012
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 861
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.186787504702804e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4857451266712614
          entropy_coeff: 0.009999999999999998
          kl: 0.02288703572774627
          policy_loss: -0.13999225166108872
          total_loss: 0.572817305723826
          vf_explained_var: 0.7358623147010803
          vf_loss: 0.7276668555206722
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 263000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,263,6991.46,263000,-27.178,-20.3,-41.6,271.78


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-28_23-02-57
  done: false
  episode_len_mean: 273.0
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.300000000000118
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 2
  episodes_total: 863
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.280181257054212e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.6051893353462219
          entropy_coeff: 0.009999999999999998
          kl: 0.02421037501703521
          policy_loss: -0.035866548948817786
          total_loss: 0.7917338278558519
          vf_explained_var: 0.4617060422897339
          vf_loss: 0.8436520501971245
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,264,7011.37,264000,-27.3,-20.3,-41.6,273


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-28_23-03-13
  done: false
  episode_len_mean: 276.22
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -27.622000000000124
  episode_reward_min: -46.000000000000384
  episodes_this_iter: 2
  episodes_total: 865
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3920271885581315e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4687656574779087
          entropy_coeff: 0.009999999999999998
          kl: 0.017905425512727093
          policy_loss: -0.08527421851952871
          total_loss: 0.8763406548235152
          vf_explained_var: 0.16968728601932526
          vf_loss: 0.9763022699289852
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,265,7027.64,265000,-27.622,-20.3,-46,276.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-28_23-03-31
  done: false
  episode_len_mean: 281.27
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.127000000000134
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 868
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3920271885581315e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4924886928664314
          entropy_coeff: 0.009999999999999998
          kl: 0.015410045745531776
          policy_loss: 0.06092242747545242
          total_loss: 1.034399922688802
          vf_explained_var: -0.013236074708402157
          vf_loss: 0.988402168204387
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,266,7045,266000,-28.127,-20.3,-46.4,281.27




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-28_23-04-05
  done: false
  episode_len_mean: 283.59
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.359000000000133
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 2
  episodes_total: 870
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3920271885581315e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4309436096085442
          entropy_coeff: 0.009999999999999998
          kl: 0.013107183187752645
          policy_loss: -0.07823993778891034
          total_loss: 0.968181182609664
          vf_explained_var: 0.10439285635948181
          vf_loss: 1.0607303522941138
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,267,7079.06,267000,-28.359,-20.3,-46.4,283.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-28_23-04-26
  done: false
  episode_len_mean: 286.54
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.65400000000013
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 873
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3920271885581315e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.3491982539494833
          entropy_coeff: 0.009999999999999998
          kl: 0.007132395431980301
          policy_loss: 0.05004212963912222
          total_loss: 1.108564516570833
          vf_explained_var: 0.01493342686444521
          vf_loss: 1.0720142828093635
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,268,7100.22,268000,-28.654,-20.3,-46.4,286.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-28_23-04-48
  done: false
  episode_len_mean: 287.13
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.71300000000014
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 876
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3920271885581315e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2930557356940375
          entropy_coeff: 0.009999999999999998
          kl: 0.011090603257018728
          policy_loss: 0.05520208279291789
          total_loss: 1.0943413035737144
          vf_explained_var: -0.10731089860200882
          vf_loss: 1.0520696351511611
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,269,7122.62,269000,-28.713,-20.3,-46.4,287.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-28_23-05-08
  done: false
  episode_len_mean: 289.35
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.935000000000144
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 2
  episodes_total: 878
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3920271885581315e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2988288627730475
          entropy_coeff: 0.009999999999999998
          kl: 0.012010017153435233
          policy_loss: -0.07877307352092532
          total_loss: 1.0034787697924508
          vf_explained_var: -0.1841343492269516
          vf_loss: 1.0952399680183993
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,270,7142.09,270000,-28.935,-20.3,-46.4,289.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-28_23-05-31
  done: false
  episode_len_mean: 289.64
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.96400000000014
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 881
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3920271885581315e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.219526720046997
          entropy_coeff: 0.009999999999999998
          kl: 0.004563571956626669
          policy_loss: -0.09360203014479743
          total_loss: 1.4577303727467854
          vf_explained_var: 0.047652557492256165
          vf_loss: 1.5635276092423334
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,271,7165.58,271000,-28.964,-20.3,-46.4,289.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-28_23-05-53
  done: false
  episode_len_mean: 289.62
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.962000000000145
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 884
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1429546078046162
          entropy_coeff: 0.009999999999999998
          kl: 0.005525797011257271
          policy_loss: -0.11024442985653878
          total_loss: 1.376090853744083
          vf_explained_var: 0.12091179192066193
          vf_loss: 1.497764798005422
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,272,7187.59,272000,-28.962,-20.3,-46.4,289.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-28_23-06-19
  done: false
  episode_len_mean: 290.39
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.039000000000144
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 888
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0831575406922234
          entropy_coeff: 0.009999999999999998
          kl: 0.011028280650783687
          policy_loss: 0.025588286833630666
          total_loss: 1.3932104998164707
          vf_explained_var: 0.12060458958148956
          vf_loss: 1.3784537023968166
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,273,7212.7,273000,-29.039,-20.3,-46.4,290.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-28_23-06-42
  done: false
  episode_len_mean: 291.64
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.164000000000144
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 891
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.276448557111952
          entropy_coeff: 0.009999999999999998
          kl: 0.006553855661377068
          policy_loss: 0.06350315577454037
          total_loss: 0.9370275331868065
          vf_explained_var: 0.11773204803466797
          vf_loss: 0.8862888256708781
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,274,7236.45,274000,-29.164,-20.3,-46.4,291.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-28_23-07-07
  done: false
  episode_len_mean: 293.02
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.30200000000015
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 894
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2715910779105293
          entropy_coeff: 0.009999999999999998
          kl: 0.012157389295585474
          policy_loss: 0.014002150545517603
          total_loss: 1.0768057694037756
          vf_explained_var: -0.014246919192373753
          vf_loss: 1.0755194523268277
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,275,7260.95,275000,-29.302,-20.3,-46.4,293.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-28_23-07-31
  done: false
  episode_len_mean: 294.31
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.431000000000143
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 897
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1959950990147061
          entropy_coeff: 0.009999999999999998
          kl: 0.00833602404315205
          policy_loss: -0.0891981272233857
          total_loss: 1.068628951576021
          vf_explained_var: 0.1244247630238533
          vf_loss: 1.1697869764433966
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,276,7285.14,276000,-29.431,-20.3,-46.4,294.31




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-28_23-08-10
  done: false
  episode_len_mean: 296.24
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.62400000000015
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 901
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1832209878497653
          entropy_coeff: 0.009999999999999998
          kl: 0.0114633313543187
          policy_loss: 0.012484932525290383
          total_loss: 1.6033814867337546
          vf_explained_var: 0.06394818425178528
          vf_loss: 1.602728713883294
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,277,7323.94,277000,-29.624,-20.3,-46.4,296.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-28_23-08-34
  done: false
  episode_len_mean: 298.08
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.808000000000156
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 904
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1778859085506863
          entropy_coeff: 0.009999999999999998
          kl: 0.008479722724271315
          policy_loss: 0.08161966005961101
          total_loss: 0.8387359966834386
          vf_explained_var: 0.03533700853586197
          vf_loss: 0.7688951386345757
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 278000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,278,7347.67,278000,-29.808,-20.3,-46.4,298.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-28_23-08-57
  done: false
  episode_len_mean: 299.2
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.92000000000015
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 907
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.096202896700965
          entropy_coeff: 0.009999999999999998
          kl: 0.008503850476867096
          policy_loss: 0.0646354208389918
          total_loss: 0.832000106241968
          vf_explained_var: -0.184159517288208
          vf_loss: 0.778326665237546
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,279,7370.97,279000,-29.92,-20.3,-46.4,299.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-28_23-09-21
  done: false
  episode_len_mean: 301.3
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.130000000000155
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 910
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2014628105693392
          entropy_coeff: 0.009999999999999998
          kl: 0.00868236026391654
          policy_loss: -0.05993852847152286
          total_loss: 1.4349404719140795
          vf_explained_var: 0.0008322974317707121
          vf_loss: 1.5068935778405932
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,280,7395.16,280000,-30.13,-20.3,-46.4,301.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-28_23-09-45
  done: false
  episode_len_mean: 302.53
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.25300000000016
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 913
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.134267274538676
          entropy_coeff: 0.009999999999999998
          kl: 0.014743212382495624
          policy_loss: -0.10838004185093773
          total_loss: 1.3943652596738603
          vf_explained_var: 0.18585160374641418
          vf_loss: 1.5140878783331977
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,281,7418.82,281000,-30.253,-20.3,-46.4,302.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-28_23-10-08
  done: false
  episode_len_mean: 304.77
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.47700000000017
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 917
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0618585454093086
          entropy_coeff: 0.009999999999999998
          kl: 0.006322275209919958
          policy_loss: 0.04882435906264517
          total_loss: 1.290186259812779
          vf_explained_var: 0.24450001120567322
          vf_loss: 1.2519804345236885
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,282,7441.95,282000,-30.477,-20.3,-46.4,304.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-28_23-10-35
  done: false
  episode_len_mean: 304.97
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.497000000000163
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 920
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9263662815093994
          entropy_coeff: 0.009999999999999998
          kl: 0.01791167896433308
          policy_loss: -0.05944497626688745
          total_loss: 1.7474558326933118
          vf_explained_var: 0.4197888672351837
          vf_loss: 1.8161643637551201
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,283,7469.36,283000,-30.497,-20.3,-46.4,304.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-28_23-10-58
  done: false
  episode_len_mean: 307.23
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.72300000000017
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 923
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.960135942790658e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1047041760550604
          entropy_coeff: 0.009999999999999998
          kl: 0.004457110391146636
          policy_loss: -0.09797134713994132
          total_loss: 1.3882112907038795
          vf_explained_var: 0.23596562445163727
          vf_loss: 1.4972296648555332
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,284,7491.92,284000,-30.723,-20.3,-46.4,307.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-28_23-11-23
  done: false
  episode_len_mean: 308.87
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.887000000000167
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 927
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.480067971395329e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0325772921244303
          entropy_coeff: 0.009999999999999998
          kl: 0.0033569681107283125
          policy_loss: -0.10575080911318462
          total_loss: 1.047917029592726
          vf_explained_var: 0.37185585498809814
          vf_loss: 1.163993618885676
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,285,7516.64,285000,-30.887,-20.3,-46.4,308.87




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-28_23-12-05
  done: false
  episode_len_mean: 308.98
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.89800000000017
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 930
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7400339856976644e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9486669301986694
          entropy_coeff: 0.009999999999999998
          kl: 0.0070416673164118235
          policy_loss: -0.07999036543899113
          total_loss: 0.8718650076124403
          vf_explained_var: 0.6462932825088501
          vf_loss: 0.9613420340749953
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,286,7559.03,286000,-30.898,-20.3,-46.4,308.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-28_23-12-30
  done: false
  episode_len_mean: 311.47
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -31.147000000000176
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 934
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7400339856976644e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9214653227064344
          entropy_coeff: 0.009999999999999998
          kl: 0.010870286235816708
          policy_loss: 0.03720460755543576
          total_loss: 1.6172399666574266
          vf_explained_var: 0.13366232812404633
          vf_loss: 1.5892499910460578
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,287,7583.77,287000,-31.147,-20.3,-46.4,311.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-28_23-12-57
  done: false
  episode_len_mean: 312.07
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -31.207000000000175
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 938
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7400339856976644e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8986053393946754
          entropy_coeff: 0.009999999999999998
          kl: 0.0061518475881143
          policy_loss: -0.013377763662073347
          total_loss: 1.0790573166476356
          vf_explained_var: 0.3144305646419525
          vf_loss: 1.1014211263921525
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,288,7610.47,288000,-31.207,-20.3,-46.4,312.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-28_23-13-26
  done: false
  episode_len_mean: 311.95
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -31.195000000000167
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 942
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7400339856976644e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7542121834225125
          entropy_coeff: 0.009999999999999998
          kl: 0.004980094177663751
          policy_loss: -0.0036205608811643386
          total_loss: 0.9568445331520504
          vf_explained_var: 0.34234246611595154
          vf_loss: 0.9680072214868334
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,289,7639.63,289000,-31.195,-23.5,-46.4,311.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-28_23-13-54
  done: false
  episode_len_mean: 311.85
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -31.185000000000176
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 946
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.700169928488322e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7479820324314965
          entropy_coeff: 0.009999999999999998
          kl: 0.004197859105567522
          policy_loss: -0.0005755451818307241
          total_loss: 1.0930973533127042
          vf_explained_var: 0.33599141240119934
          vf_loss: 1.1011527200539908
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,290,7668.08,290000,-31.185,-23.5,-46.4,311.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-28_23-14-24
  done: false
  episode_len_mean: 311.14
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -31.114000000000175
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 950
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.350084964244161e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6872472590870328
          entropy_coeff: 0.009999999999999998
          kl: 0.007695898748369719
          policy_loss: 0.039846902506219015
          total_loss: 0.9181028154161242
          vf_explained_var: 0.32325685024261475
          vf_loss: 0.8851283881399367
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 291000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,291,7697.74,291000,-31.114,-23.5,-46.4,311.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-28_23-14-53
  done: false
  episode_len_mean: 309.96
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -30.996000000000162
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 954
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.350084964244161e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7065731512175666
          entropy_coeff: 0.009999999999999998
          kl: 0.003362243167054007
          policy_loss: 0.034943792803419964
          total_loss: 0.7465398924218284
          vf_explained_var: 0.6192364692687988
          vf_loss: 0.718661829829216
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,292,7727.09,292000,-30.996,-23.5,-46.4,309.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-28_23-15-22
  done: false
  episode_len_mean: 308.49
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -30.849000000000164
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 958
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1750424821220805e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7326034956508213
          entropy_coeff: 0.009999999999999998
          kl: 0.003569776150615588
          policy_loss: 0.016419279989269044
          total_loss: 0.7184118327167299
          vf_explained_var: 0.6128211617469788
          vf_loss: 0.7093185977803336
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,293,7756.23,293000,-30.849,-23.5,-46.4,308.49




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-28_23-16-08
  done: false
  episode_len_mean: 305.3
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -30.53000000000017
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 962
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0875212410610403e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8550807787312402
          entropy_coeff: 0.009999999999999998
          kl: 0.00789027605123067
          policy_loss: -0.023907201157675848
          total_loss: 1.5827973392274646
          vf_explained_var: 0.30558496713638306
          vf_loss: 1.6152553492122226
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,294,7802.14,294000,-30.53,-23.5,-46.4,305.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-28_23-16-35
  done: false
  episode_len_mean: 299.09
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -29.90900000000015
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 966
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0875212410610403e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8638442390494876
          entropy_coeff: 0.009999999999999998
          kl: 0.0025919060526049826
          policy_loss: 0.02593527233435048
          total_loss: 1.3355783700942994
          vf_explained_var: 0.3003523051738739
          vf_loss: 1.3182815233866374
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 295000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,295,7829.07,295000,-29.909,-23.5,-46.4,299.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-28_23-17-00
  done: false
  episode_len_mean: 293.97
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -29.397000000000148
  episode_reward_min: -44.30000000000036
  episodes_this_iter: 3
  episodes_total: 969
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.437606205305201e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8236878607008192
          entropy_coeff: 0.009999999999999998
          kl: 0.0037292161377009215
          policy_loss: -0.0937203141550223
          total_loss: 1.1512894570827483
          vf_explained_var: 0.27278903126716614
          vf_loss: 1.2532466537422604
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 296000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,296,7853.7,296000,-29.397,-23.5,-44.3,293.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-28_23-17-28
  done: false
  episode_len_mean: 288.79
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -28.879000000000136
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 973
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7188031026526006e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8067236973179711
          entropy_coeff: 0.009999999999999998
          kl: 0.004423618128671889
          policy_loss: -0.028937179678016237
          total_loss: 1.440226145585378
          vf_explained_var: 0.12470460683107376
          vf_loss: 1.4772305806477866
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,297,7881.27,297000,-28.879,-23.5,-40.7,288.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-28_23-17-56
  done: false
  episode_len_mean: 285.02
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -28.502000000000134
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 977
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3594015513263003e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7840675734811359
          entropy_coeff: 0.009999999999999998
          kl: 0.009372417484613118
          policy_loss: 0.022846109254492653
          total_loss: 1.3356117301517063
          vf_explained_var: 0.2579268515110016
          vf_loss: 1.3206063191095987
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,298,7909.39,298000,-28.502,-23.5,-40.7,285.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-28_23-18-22
  done: false
  episode_len_mean: 281.83
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -28.183000000000128
  episode_reward_min: -37.10000000000026
  episodes_this_iter: 4
  episodes_total: 981
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3594015513263003e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8052242232693566
          entropy_coeff: 0.009999999999999998
          kl: 0.004496751662426175
          policy_loss: 0.019406100279755062
          total_loss: 1.5325598107443916
          vf_explained_var: 0.01278723869472742
          vf_loss: 1.5212059418360393
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,299,7935.57,299000,-28.183,-23.5,-37.1,281.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-28_23-18-50
  done: false
  episode_len_mean: 279.1
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -27.910000000000124
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 4
  episodes_total: 985
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8128275897767808
          entropy_coeff: 0.009999999999999998
          kl: 0.005661748451294339
          policy_loss: 7.97265519698461e-05
          total_loss: 1.3797187195883858
          vf_explained_var: 0.16024987399578094
          vf_loss: 1.3877672771612803
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,300,7963.69,300000,-27.91,-23.5,-36.6,279.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-28_23-19-17
  done: false
  episode_len_mean: 278.09
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -27.809000000000125
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 3
  episodes_total: 988
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.7970077566315016e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7292919198671977
          entropy_coeff: 0.009999999999999998
          kl: 0.003187868160882099
          policy_loss: -0.10611244498027696
          total_loss: 0.8871139788793193
          vf_explained_var: 0.41756558418273926
          vf_loss: 1.000519335269928
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,301,7990.96,301000,-27.809,-23.5,-36.6,278.09




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-28_23-20-03
  done: false
  episode_len_mean: 276.6
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.660000000000124
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 4
  episodes_total: 992
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3985038783157508e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8376157575183445
          entropy_coeff: 0.009999999999999998
          kl: 0.004365869311173437
          policy_loss: 0.036066419548458524
          total_loss: 1.1768165833420223
          vf_explained_var: 0.26326656341552734
          vf_loss: 1.1491263214084837
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,302,8036.39,302000,-27.66,-20.7,-36.6,276.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-28_23-20-28
  done: false
  episode_len_mean: 275.5
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.55000000000012
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 4
  episodes_total: 996
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6992519391578754e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9911706911193
          entropy_coeff: 0.009999999999999998
          kl: 0.023524223050361875
          policy_loss: 0.04817215510540539
          total_loss: 1.3510353234079149
          vf_explained_var: 0.23771801590919495
          vf_loss: 1.312774853573905
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,303,8061.97,303000,-27.55,-20.7,-36.6,275.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-28_23-20-54
  done: false
  episode_len_mean: 274.57
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.457000000000118
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 3
  episodes_total: 999
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.548877908736813e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0177517904175652
          entropy_coeff: 0.009999999999999998
          kl: 0.006602734306290535
          policy_loss: 0.005554105010297563
          total_loss: 1.064620752301481
          vf_explained_var: 0.18528111279010773
          vf_loss: 1.0692441701889037
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,304,8087.69,304000,-27.457,-20.7,-36.6,274.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-28_23-21-19
  done: false
  episode_len_mean: 273.16
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.316000000000116
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 4
  episodes_total: 1003
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.548877908736813e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.067615142133501
          entropy_coeff: 0.009999999999999998
          kl: 0.007993739956356351
          policy_loss: -0.02126780086093479
          total_loss: 1.413363304403093
          vf_explained_var: 0.18568164110183716
          vf_loss: 1.4453072481685214
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,305,8112.8,305000,-27.316,-20.7,-36.6,273.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-28_23-21-46
  done: false
  episode_len_mean: 272.24
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.224000000000125
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 3
  episodes_total: 1006
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.548877908736813e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1119454714987014
          entropy_coeff: 0.009999999999999998
          kl: 0.04577049440194069
          policy_loss: -0.07881821741660436
          total_loss: 2.101885109477573
          vf_explained_var: 0.3240995407104492
          vf_loss: 2.191822768913375
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,306,8139.71,306000,-27.224,-20.7,-36.6,272.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-28_23-22-12
  done: false
  episode_len_mean: 270.65
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.06500000000012
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 4
  episodes_total: 1010
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.823316863105221e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9837388985686832
          entropy_coeff: 0.009999999999999998
          kl: 0.004677923090625708
          policy_loss: 0.02659613316257795
          total_loss: 1.2309544245402018
          vf_explained_var: 0.1669641137123108
          vf_loss: 1.214195684591929
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,307,8165.9,307000,-27.065,-20.7,-36.6,270.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-28_23-22-39
  done: false
  episode_len_mean: 269.33
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.93300000000011
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 4
  episodes_total: 1014
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9116584315526104e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9367639270093706
          entropy_coeff: 0.009999999999999998
          kl: 0.006836911845209354
          policy_loss: 0.0021753055767880546
          total_loss: 1.2331731836001079
          vf_explained_var: 0.21528911590576172
          vf_loss: 1.240365524424447
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,308,8192.42,308000,-26.933,-20.7,-36.6,269.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-28_23-23-06
  done: false
  episode_len_mean: 268.17
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.817000000000107
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 3
  episodes_total: 1017
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9116584315526104e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9442388229899936
          entropy_coeff: 0.009999999999999998
          kl: 0.007233355426607662
          policy_loss: -0.0994380539490117
          total_loss: 1.2732535825835334
          vf_explained_var: 0.05236002057790756
          vf_loss: 1.3821340216530693
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,309,8219.31,309000,-26.817,-20.7,-36.6,268.17




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-28_23-23-51
  done: false
  episode_len_mean: 267.58
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.758000000000106
  episode_reward_min: -36.60000000000025
  episodes_this_iter: 4
  episodes_total: 1021
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9116584315526104e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9325248029496934
          entropy_coeff: 0.009999999999999998
          kl: 0.004936667466004805
          policy_loss: -0.08263582429952092
          total_loss: 1.267357779873742
          vf_explained_var: 0.27585187554359436
          vf_loss: 1.3593188479542733
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,310,8263.88,310000,-26.758,-20.7,-36.6,267.58


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-28_23-24-16
  done: false
  episode_len_mean: 265.67
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.567000000000103
  episode_reward_min: -35.90000000000024
  episodes_this_iter: 4
  episodes_total: 1025
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.558292157763052e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8573262810707092
          entropy_coeff: 0.009999999999999998
          kl: 0.00817092834265966
          policy_loss: 0.030282042920589447
          total_loss: 1.210209118657642
          vf_explained_var: 0.26558929681777954
          vf_loss: 1.188500334156884
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,311,8289.64,311000,-26.567,-20.7,-35.9,265.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-28_23-24-43
  done: false
  episode_len_mean: 265.69
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.569000000000106
  episode_reward_min: -35.90000000000024
  episodes_this_iter: 4
  episodes_total: 1029
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.558292157763052e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8119053602218628
          entropy_coeff: 0.009999999999999998
          kl: 0.005222393588156393
          policy_loss: 0.017870705243613984
          total_loss: 1.4528470171822443
          vf_explained_var: 0.05729848891496658
          vf_loss: 1.4430953754319085
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,312,8316.01,312000,-26.569,-20.7,-35.9,265.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-28_23-25-10
  done: false
  episode_len_mean: 264.71
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.471000000000103
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 3
  episodes_total: 1032
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.558292157763052e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.886265919605891
          entropy_coeff: 0.009999999999999998
          kl: 0.002744610580868439
          policy_loss: -0.03504207328789764
          total_loss: 0.9943900042110019
          vf_explained_var: 0.23509356379508972
          vf_loss: 1.038294733232922
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,313,8343.35,313000,-26.471,-20.7,-32.7,264.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-28_23-25-36
  done: false
  episode_len_mean: 264.28
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.428000000000107
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 1036
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.779146078881526e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8722445799244775
          entropy_coeff: 0.009999999999999998
          kl: 0.0068692972337417915
          policy_loss: -0.017963355862432057
          total_loss: 1.1299392521381377
          vf_explained_var: 0.4807773530483246
          vf_loss: 1.1566250483194986
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,314,8369.49,314000,-26.428,-20.7,-32.7,264.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-28_23-26-05
  done: false
  episode_len_mean: 264.05
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.405000000000108
  episode_reward_min: -32.700000000000195
  episodes_this_iter: 4
  episodes_total: 1040
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.779146078881526e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.114786018927892
          entropy_coeff: 0.009999999999999998
          kl: 0.017632853996967037
          policy_loss: 0.028519223795996772
          total_loss: 0.5481981678141488
          vf_explained_var: 0.8327961564064026
          vf_loss: 0.5308268035451571
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,315,8398.53,315000,-26.405,-20.7,-32.7,264.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-28_23-26-31
  done: false
  episode_len_mean: 265.51
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.55100000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1044
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.779146078881526e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.5165579524305133
          entropy_coeff: 0.009999999999999998
          kl: 0.03401632965359269
          policy_loss: 0.13223861422803668
          total_loss: 0.7847018577365412
          vf_explained_var: 0.8967927694320679
          vf_loss: 0.6676288255386882
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,316,8423.9,316000,-26.551,-20.7,-38.6,265.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-28_23-26-55
  done: false
  episode_len_mean: 267.33
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.73300000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 1047
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.168719118322285e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.5266370329591963
          entropy_coeff: 0.009999999999999998
          kl: 0.01983692746666029
          policy_loss: -0.06743996921512815
          total_loss: 0.33205606755283146
          vf_explained_var: 0.8553017377853394
          vf_loss: 0.414762407541275
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,317,8448.54,317000,-26.733,-20.7,-38.6,267.33




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-28_23-27-40
  done: false
  episode_len_mean: 267.76
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.776000000000113
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1051
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.168719118322285e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0998369216918946
          entropy_coeff: 0.009999999999999998
          kl: 0.00914267286411518
          policy_loss: 0.023989520718653998
          total_loss: 0.6522486709886127
          vf_explained_var: 0.8239631056785583
          vf_loss: 0.6392575171258714
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,318,8493.34,318000,-26.776,-20.7,-38.6,267.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-28_23-28-03
  done: false
  episode_len_mean: 270.04
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.00400000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 1054
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.168719118322285e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.7094964490996467
          entropy_coeff: 0.009999999999999998
          kl: 0.028460494943723172
          policy_loss: 0.05345693942573335
          total_loss: 0.7925469842222002
          vf_explained_var: 0.5372903347015381
          vf_loss: 0.7561850127246644
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,319,8516.69,319000,-27.004,-20.7,-38.6,270.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-28_23-28-33
  done: false
  episode_len_mean: 270.34
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.034000000000106
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1058
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.075307867748343e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7248533699247572
          entropy_coeff: 0.009999999999999998
          kl: 0.00520584461569512
          policy_loss: 0.015075242436594433
          total_loss: 1.2619648847315046
          vf_explained_var: 0.2416144758462906
          vf_loss: 1.254138179620107
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,320,8545.99,320000,-27.034,-20.7,-38.6,270.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-28_23-29-02
  done: false
  episode_len_mean: 269.87
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.987000000000112
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1062
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.075307867748343e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0728640178839366
          entropy_coeff: 0.009999999999999998
          kl: 0.008924266970227437
          policy_loss: 0.030063161667850283
          total_loss: 0.8733194533321592
          vf_explained_var: 0.5189721584320068
          vf_loss: 0.8539849393897586
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,321,8574.87,321000,-26.987,-20.7,-38.6,269.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-28_23-29-29
  done: false
  episode_len_mean: 269.71
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.971000000000117
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1066
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.075307867748343e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.4760942882961696
          entropy_coeff: 0.009999999999999998
          kl: 0.021061243173439062
          policy_loss: 0.06928807430797153
          total_loss: 0.8364211734798219
          vf_explained_var: 0.6523592472076416
          vf_loss: 0.7818940467304654
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,322,8602.22,322000,-26.971,-20.7,-38.6,269.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-28_23-29-55
  done: false
  episode_len_mean: 269.35
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.93500000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1070
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6129618016225151e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9127091321680281
          entropy_coeff: 0.009999999999999998
          kl: 0.0072286335569574965
          policy_loss: -0.1683621883392334
          total_loss: 1.0595088620980582
          vf_explained_var: 0.3009355068206787
          vf_loss: 1.236998152732849
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,323,8627.95,323000,-26.935,-20.7,-38.6,269.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-28_23-30-23
  done: false
  episode_len_mean: 268.9
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.890000000000114
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 1073
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6129618016225151e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7512257973353068
          entropy_coeff: 0.009999999999999998
          kl: 0.0027478799718106788
          policy_loss: -0.13258704303039445
          total_loss: 1.0787924230098724
          vf_explained_var: 0.27037397027015686
          vf_loss: 1.218891727924347
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,324,8655.78,324000,-26.89,-20.7,-38.6,268.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-28_23-30-52
  done: false
  episode_len_mean: 269.21
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.92100000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1077
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.064809008112576e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.017354233397378
          entropy_coeff: 0.009999999999999998
          kl: 0.008620411446300007
          policy_loss: -0.02501206762260861
          total_loss: 0.6787583463721805
          vf_explained_var: 0.780421793460846
          vf_loss: 0.713943961262703
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,325,8684.67,325000,-26.921,-20.7,-38.6,269.21




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-28_23-31-38
  done: false
  episode_len_mean: 267.94
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.79400000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 5
  episodes_total: 1082
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.064809008112576e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0122851596938238
          entropy_coeff: 0.009999999999999998
          kl: 0.005410819126416576
          policy_loss: 0.012156529393461015
          total_loss: 1.1380796485477023
          vf_explained_var: 0.6074368953704834
          vf_loss: 1.1360459920432833
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,326,8731.01,326000,-26.794,-20.7,-38.6,267.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-28_23-32-05
  done: false
  episode_len_mean: 267.82
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.78200000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 1085
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.064809008112576e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8220033168792724
          entropy_coeff: 0.009999999999999998
          kl: 0.0057752185124522425
          policy_loss: -0.1356171899371677
          total_loss: 0.6403397894567914
          vf_explained_var: 0.7448851466178894
          vf_loss: 0.7841770019796159
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 327000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,327,8758.45,327000,-26.782,-20.7,-38.6,267.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-28_23-32-33
  done: false
  episode_len_mean: 267.9
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.79000000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1089
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.064809008112576e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.281432729297214
          entropy_coeff: 0.009999999999999998
          kl: 0.012441708879445911
          policy_loss: 0.011255896174245411
          total_loss: 0.7036449650923411
          vf_explained_var: 0.7337953448295593
          vf_loss: 0.705203389790323
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,328,8785.78,328000,-26.79,-20.7,-38.6,267.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-28_23-33-00
  done: false
  episode_len_mean: 267.95
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.795000000000105
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1093
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.064809008112576e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.276390051841736
          entropy_coeff: 0.009999999999999998
          kl: 0.009912242973149275
          policy_loss: 0.017815605302651725
          total_loss: 0.3537033148937755
          vf_explained_var: 0.8954970240592957
          vf_loss: 0.3486516096525722
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,329,8812.95,329000,-26.795,-21.8,-38.6,267.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-28_23-33-26
  done: false
  episode_len_mean: 267.4
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.74000000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1097
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.064809008112576e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.5456582493252224
          entropy_coeff: 0.009999999999999998
          kl: 0.02796848625657032
          policy_loss: 0.07915739830997255
          total_loss: 0.5868256628513336
          vf_explained_var: 0.7328822612762451
          vf_loss: 0.5231248524453905
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,330,8838.83,330000,-26.74,-21.8,-38.6,267.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-28_23-33-51
  done: false
  episode_len_mean: 267.39
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.73900000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 1100
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2097213512168855e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7252962787946065
          entropy_coeff: 0.009999999999999998
          kl: 0.01230285566822147
          policy_loss: 0.014196443226602342
          total_loss: 0.32750721640057034
          vf_explained_var: 0.8769694566726685
          vf_loss: 0.3305637384454409
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,331,8863.74,331000,-26.739,-21.8,-38.6,267.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-28_23-34-17
  done: false
  episode_len_mean: 267.53
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.753000000000114
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1104
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2097213512168855e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.3617583500014412
          entropy_coeff: 0.009999999999999998
          kl: 0.023425703598106334
          policy_loss: -0.020497813696662585
          total_loss: 0.6262738251023823
          vf_explained_var: 0.7889789938926697
          vf_loss: 0.6603892260127597
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,332,8889.79,332000,-26.753,-21.8,-38.6,267.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-28_23-34-42
  done: false
  episode_len_mean: 267.42
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.74200000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 1107
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8145820268253287e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.3245945572853088
          entropy_coeff: 0.009999999999999998
          kl: 0.02189002719417227
          policy_loss: -0.09208833707703484
          total_loss: 0.9641387535466088
          vf_explained_var: 0.5572690367698669
          vf_loss: 1.0694730394416385
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,333,8914.99,333000,-26.742,-21.8,-38.6,267.42




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-28_23-35-25
  done: false
  episode_len_mean: 268.52
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.85200000000011
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1111
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7218730402379945e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.108194777700636
          entropy_coeff: 0.009999999999999998
          kl: 0.016816873604239193
          policy_loss: 0.04701440077688959
          total_loss: 0.8228327320681678
          vf_explained_var: 0.6490895748138428
          vf_loss: 0.7969002736939325
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,334,8957.48,334000,-26.852,-21.8,-38.6,268.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-28_23-35-51
  done: false
  episode_len_mean: 268.35
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.835000000000107
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 1114
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7218730402379945e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0260800037119124
          entropy_coeff: 0.009999999999999998
          kl: 0.01804455441438052
          policy_loss: -0.10763764464192921
          total_loss: 0.9954198333952162
          vf_explained_var: 0.47195425629615784
          vf_loss: 1.1133182770676082
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,335,8983.36,335000,-26.835,-21.8,-38.6,268.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-28_23-36-14
  done: false
  episode_len_mean: 269.89
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -26.989000000000114
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 1117
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7218730402379945e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.613587901327345
          entropy_coeff: 0.009999999999999998
          kl: 0.019312257024559347
          policy_loss: -0.06761714542905489
          total_loss: 0.9397573093573253
          vf_explained_var: 0.4620310068130493
          vf_loss: 1.0235103481345706
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,336,9007.18,336000,-26.989,-21.8,-38.6,269.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-28_23-36-38
  done: false
  episode_len_mean: 272.12
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.212000000000117
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 1121
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7218730402379945e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.592336419555876
          entropy_coeff: 0.009999999999999998
          kl: 0.07551546820491496
          policy_loss: -0.02931900943319003
          total_loss: 0.9476503882143232
          vf_explained_var: 0.3830103874206543
          vf_loss: 0.9928927646742927
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,337,9030.38,337000,-27.212,-21.8,-38.6,272.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-28_23-37-00
  done: false
  episode_len_mean: 273.67
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.36700000000012
  episode_reward_min: -39.50000000000029
  episodes_this_iter: 3
  episodes_total: 1124
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.082809560356988e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6898856149779427
          entropy_coeff: 0.009999999999999998
          kl: 0.01899517831630146
          policy_loss: 0.051092586417992907
          total_loss: 0.8632692588700188
          vf_explained_var: 0.22427107393741608
          vf_loss: 0.8290755316201184
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,338,9053.27,338000,-27.367,-21.8,-39.5,273.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-28_23-37-24
  done: false
  episode_len_mean: 274.73
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.47300000000012
  episode_reward_min: -39.700000000000294
  episodes_this_iter: 3
  episodes_total: 1127
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.082809560356988e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6314507709609138
          entropy_coeff: 0.009999999999999998
          kl: 0.02895686518540891
          policy_loss: 0.07418434644738833
          total_loss: 0.9099386930465698
          vf_explained_var: 0.17204801738262177
          vf_loss: 0.8520688566275769
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 339000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,339,9076.7,339000,-27.473,-21.8,-39.7,274.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-28_23-37-44
  done: false
  episode_len_mean: 276.67
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.667000000000122
  episode_reward_min: -39.700000000000294
  episodes_this_iter: 2
  episodes_total: 1129
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.124214340535484e-09
          cur_lr: 5.000000000000001e-05
          entropy: 2.1792441421084936
          entropy_coeff: 0.009999999999999998
          kl: 0.025857822703435614
          policy_loss: -0.0945021872719129
          total_loss: 0.7051930690805117
          vf_explained_var: 0.26766976714134216
          vf_loss: 0.8214876958065562
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,340,9096.67,340000,-27.667,-21.8,-39.7,276.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-28_23-38-13
  done: false
  episode_len_mean: 277.19
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.71900000000013
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1133
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.186321510803229e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8111028621594111
          entropy_coeff: 0.009999999999999998
          kl: 0.09438484851761442
          policy_loss: -0.11388278678059578
          total_loss: 1.012855585416158
          vf_explained_var: 0.43918687105178833
          vf_loss: 1.1348493960168626
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,341,9126.05,341000,-27.719,-21.8,-40.1,277.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-28_23-38-40
  done: false
  episode_len_mean: 277.48
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.748000000000125
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1137
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3779482266204835e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6276634713013967
          entropy_coeff: 0.009999999999999998
          kl: 0.017524805335132104
          policy_loss: 0.002824491345220142
          total_loss: 0.8619399925072988
          vf_explained_var: 0.193480983376503
          vf_loss: 0.8753921444217364
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,342,9152.49,342000,-27.748,-21.8,-40.1,277.48




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-28_23-39-24
  done: false
  episode_len_mean: 277.2
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.720000000000123
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1141
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3779482266204835e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9117805606789059
          entropy_coeff: 0.009999999999999998
          kl: 0.010405305773484643
          policy_loss: -0.0840517041583856
          total_loss: 0.9039035730891758
          vf_explained_var: 0.06734784692525864
          vf_loss: 0.9970730847782558
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,343,9197.13,343000,-27.72,-21.8,-40.1,277.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-28_23-39-54
  done: false
  episode_len_mean: 275.44
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.544000000000125
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1145
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3779482266204835e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6816574093368318
          entropy_coeff: 0.009999999999999998
          kl: 0.017633300466131947
          policy_loss: -0.08882048519121276
          total_loss: 0.7396091262499491
          vf_explained_var: 0.4183597266674042
          vf_loss: 0.8352461847994063
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,344,9226.45,344000,-27.544,-21.8,-40.1,275.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-28_23-40-24
  done: false
  episode_len_mean: 273.12
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.312000000000122
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 5
  episodes_total: 1150
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3779482266204835e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6666538503434923
          entropy_coeff: 0.009999999999999998
          kl: 0.011271145709831861
          policy_loss: -0.008552328331602944
          total_loss: 0.8689986023637983
          vf_explained_var: 0.5039109587669373
          vf_loss: 0.8842174662484064
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,345,9256.75,345000,-27.312,-22.8,-40.1,273.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-28_23-40-54
  done: false
  episode_len_mean: 270.33
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.03300000000011
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1154
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3779482266204835e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8465621564123366
          entropy_coeff: 0.009999999999999998
          kl: 0.021086264802981623
          policy_loss: 0.05588213445411788
          total_loss: 0.7151017526785532
          vf_explained_var: 0.7544182538986206
          vf_loss: 0.6676852421628104
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,346,9286.66,346000,-27.033,-22.8,-40.1,270.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-28_23-41-22
  done: false
  episode_len_mean: 270.42
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.04200000000011
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1158
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0528562174903022
          entropy_coeff: 0.009999999999999998
          kl: 0.008812779628461304
          policy_loss: 0.04982261409362157
          total_loss: 0.4568575192656782
          vf_explained_var: 0.7481338381767273
          vf_loss: 0.4175634688801236
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,347,9314.93,347000,-27.042,-22.8,-40.1,270.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-28_23-41-50
  done: false
  episode_len_mean: 271.06
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.10600000000011
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1162
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0127591523859236
          entropy_coeff: 0.009999999999999998
          kl: 0.005619212558897699
          policy_loss: 0.014464528858661651
          total_loss: 0.6444648305575053
          vf_explained_var: 0.5930985808372498
          vf_loss: 0.6401278873284658
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,348,9342.1,348000,-27.106,-22.8,-40.1,271.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-28_23-42-17
  done: false
  episode_len_mean: 270.67
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.06700000000012
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1165
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.281895203060574
          entropy_coeff: 0.009999999999999998
          kl: 0.010574863232492623
          policy_loss: -0.1079076220591863
          total_loss: 0.2545284561812878
          vf_explained_var: 0.7428356409072876
          vf_loss: 0.3752550267510944
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,349,9369.76,349000,-27.067,-22.8,-40.1,270.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-28_23-42-45
  done: false
  episode_len_mean: 271.67
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.16700000000011
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1169
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2076879686779447
          entropy_coeff: 0.009999999999999998
          kl: 0.006762298237484366
          policy_loss: -0.04761271135260661
          total_loss: 0.8824638644854228
          vf_explained_var: 0.23665305972099304
          vf_loss: 0.9421534561448627
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,350,9397.36,350000,-27.167,-22.8,-40.1,271.67




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-28_23-43-28
  done: false
  episode_len_mean: 272.27
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.227000000000118
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1173
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3715512964460586
          entropy_coeff: 0.009999999999999998
          kl: 0.006659281778661984
          policy_loss: 0.019707721720139185
          total_loss: 1.0310132589605119
          vf_explained_var: 0.29387155175209045
          vf_loss: 1.025021051035987
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 351000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,351,9440.14,351000,-27.227,-22.8,-40.1,272.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-28_23-43-50
  done: false
  episode_len_mean: 273.82
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.382000000000122
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1176
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4243399090237088
          entropy_coeff: 0.009999999999999998
          kl: 0.012303113155138826
          policy_loss: 0.0840243221157127
          total_loss: 0.7374331540531582
          vf_explained_var: 0.4746800363063812
          vf_loss: 0.6676522312064965
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,352,9462.68,352000,-27.382,-22.8,-40.1,273.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-28_23-44-12
  done: false
  episode_len_mean: 276.72
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.672000000000118
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1179
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.462127559714847
          entropy_coeff: 0.009999999999999998
          kl: 0.009727066447218588
          policy_loss: 0.044621749718983965
          total_loss: 0.8528241634368896
          vf_explained_var: 0.44791093468666077
          vf_loss: 0.8228236919475926
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,353,9484.37,353000,-27.672,-22.8,-40.1,276.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-28_23-44-33
  done: false
  episode_len_mean: 279.87
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.987000000000126
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1182
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.397895222240024
          entropy_coeff: 0.009999999999999998
          kl: 0.012196272659505035
          policy_loss: 0.03602644946012232
          total_loss: 1.1409330646197
          vf_explained_var: 0.16808170080184937
          vf_loss: 1.118885569439994
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,354,9505.74,354000,-27.987,-22.8,-40.1,279.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-28_23-44-54
  done: false
  episode_len_mean: 282.02
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.202000000000126
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 2
  episodes_total: 1184
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4056441611713832
          entropy_coeff: 0.009999999999999998
          kl: 0.010753567097946446
          policy_loss: -0.11827107866605123
          total_loss: 0.8807138979434967
          vf_explained_var: -0.04712768271565437
          vf_loss: 1.0130414190391699
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,355,9526.38,355000,-28.202,-22.8,-40.1,282.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-28_23-45-16
  done: false
  episode_len_mean: 284.01
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.401000000000135
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1187
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.286412763595581
          entropy_coeff: 0.009999999999999998
          kl: 0.014675606007563206
          policy_loss: -0.12104275888866849
          total_loss: 1.2745701339509752
          vf_explained_var: 0.1810293048620224
          vf_loss: 1.4084770109918383
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,356,9548.75,356000,-28.401,-22.8,-40.1,284.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-28_23-45-40
  done: false
  episode_len_mean: 286.18
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.618000000000134
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1191
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2741409288512335
          entropy_coeff: 0.009999999999999998
          kl: 0.008226731016198792
          policy_loss: -0.01431079829732577
          total_loss: 1.314815526538425
          vf_explained_var: 0.17916318774223328
          vf_loss: 1.3418677224053277
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,357,9572.81,357000,-28.618,-22.8,-40.1,286.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-28_23-46-05
  done: false
  episode_len_mean: 287.32
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.732000000000138
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1194
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.239983860651652
          entropy_coeff: 0.009999999999999998
          kl: 0.006371051950155504
          policy_loss: 0.07444966153966057
          total_loss: 1.1398378392060597
          vf_explained_var: 0.11947628855705261
          vf_loss: 1.0777880186008082
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,358,9597.21,358000,-28.732,-22.8,-40.1,287.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-28_23-46-31
  done: false
  episode_len_mean: 287.54
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.75400000000013
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1197
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1495924949645997
          entropy_coeff: 0.009999999999999998
          kl: 0.005409911683459711
          policy_loss: -0.10368925953904788
          total_loss: 1.4600801110267638
          vf_explained_var: 0.017253722995519638
          vf_loss: 1.5752652976247998
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,359,9623.29,359000,-28.754,-22.8,-40.1,287.54




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-28_23-47-11
  done: false
  episode_len_mean: 288.0
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.800000000000136
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1201
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1017434146669176
          entropy_coeff: 0.009999999999999998
          kl: 0.013048176988888313
          policy_loss: 0.04240293887754281
          total_loss: 1.3555218074056836
          vf_explained_var: 0.10419037193059921
          vf_loss: 1.3241362909475962
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,360,9663.73,360000,-28.8,-22.8,-40.1,288


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-28_23-47-33
  done: false
  episode_len_mean: 289.53
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.95300000000014
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1204
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.280755180782742
          entropy_coeff: 0.009999999999999998
          kl: 0.009472386236425321
          policy_loss: 0.0630828105741077
          total_loss: 1.1884476019276513
          vf_explained_var: 0.097346730530262
          vf_loss: 1.1381723523139953
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,361,9685.22,361000,-28.953,-22.8,-40.1,289.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-28_23-47-54
  done: false
  episode_len_mean: 291.91
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.191000000000148
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1207
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.219133236673143
          entropy_coeff: 0.009999999999999998
          kl: 0.010627431702237781
          policy_loss: 0.0938653783665763
          total_loss: 0.7704410920540492
          vf_explained_var: 0.32947438955307007
          vf_loss: 0.6887670423835516
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,362,9706.52,362000,-29.191,-22.8,-40.1,291.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-28_23-48-17
  done: false
  episode_len_mean: 293.01
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.301000000000148
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1210
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1807444016138713
          entropy_coeff: 0.009999999999999998
          kl: 0.013054315330796366
          policy_loss: 0.034420116990804675
          total_loss: 1.1912860807445314
          vf_explained_var: 0.16550563275814056
          vf_loss: 1.1686733994219038
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,363,9728.84,363000,-29.301,-22.8,-40.1,293.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-28_23-48-39
  done: false
  episode_len_mean: 294.74
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.47400000000015
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1213
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1861860023604498
          entropy_coeff: 0.009999999999999998
          kl: 0.009530826249566676
          policy_loss: 0.037256922407282726
          total_loss: 1.1643868999348745
          vf_explained_var: 0.26954320073127747
          vf_loss: 1.1389918303324116
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,364,9751.12,364000,-29.474,-22.8,-40.1,294.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-28_23-49-03
  done: false
  episode_len_mean: 294.93
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.49300000000015
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1216
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2217316071192423
          entropy_coeff: 0.009999999999999998
          kl: 0.010872059566890534
          policy_loss: 0.037236981011099284
          total_loss: 1.3942821413278579
          vf_explained_var: 0.1945013701915741
          vf_loss: 1.3692624875240855
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,365,9774.86,365000,-29.493,-22.8,-40.1,294.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-28_23-49-25
  done: false
  episode_len_mean: 295.99
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.599000000000146
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1219
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2476831860012478
          entropy_coeff: 0.009999999999999998
          kl: 0.008610580807906684
          policy_loss: 0.05750615662998623
          total_loss: 1.214572396212154
          vf_explained_var: 0.1992649883031845
          vf_loss: 1.1695430600808727
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,366,9797.46,366000,-29.599,-22.8,-40.1,295.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-28_23-49-49
  done: false
  episode_len_mean: 294.48
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.448000000000143
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1222
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1433417916297912
          entropy_coeff: 0.009999999999999998
          kl: 0.008158770508851236
          policy_loss: 0.016150422559844124
          total_loss: 1.2877447499169243
          vf_explained_var: -0.04007551446557045
          vf_loss: 1.2830277274052302
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,367,9821.05,367000,-29.448,-22.8,-40.1,294.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-28_23-50-13
  done: false
  episode_len_mean: 293.95
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.395000000000145
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 3
  episodes_total: 1225
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0669223399307266e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9931963509983487
          entropy_coeff: 0.009999999999999998
          kl: 0.021470709742780317
          policy_loss: -0.12272004998392529
          total_loss: 1.5375056544939676
          vf_explained_var: 0.13790616393089294
          vf_loss: 1.670157665676541
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,368,9844.99,368000,-29.395,-22.8,-40.1,293.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-28_23-50-38
  done: false
  episode_len_mean: 292.39
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.239000000000146
  episode_reward_min: -40.1000000000003
  episodes_this_iter: 4
  episodes_total: 1229
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.10038350989609e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.995483648777008
          entropy_coeff: 0.009999999999999998
          kl: 0.0022875788677039605
          policy_loss: 0.041659018438723355
          total_loss: 1.5823562648561267
          vf_explained_var: 0.11387451738119125
          vf_loss: 1.5506520748138428
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,369,9870.2,369000,-29.239,-22.8,-40.1,292.39




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-28_23-51-22
  done: false
  episode_len_mean: 291.96
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.196000000000144
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 4
  episodes_total: 1233
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.550191754948045e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9119282060199314
          entropy_coeff: 0.009999999999999998
          kl: 0.005233298578928587
          policy_loss: 0.032405433886581
          total_loss: 1.6743569996621874
          vf_explained_var: -0.021192675456404686
          vf_loss: 1.6510708411534627
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,370,9914.49,370000,-29.196,-22.8,-39.8,291.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-28_23-51-48
  done: false
  episode_len_mean: 292.63
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.263000000000147
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 3
  episodes_total: 1236
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.550191754948045e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9262414826287164
          entropy_coeff: 0.009999999999999998
          kl: 0.003958696911552511
          policy_loss: -0.046892304056220586
          total_loss: 1.370915404955546
          vf_explained_var: 0.003089515957981348
          vf_loss: 1.4270701083872053
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,371,9939.94,371000,-29.263,-22.8,-39.8,292.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-28_23-52-12
  done: false
  episode_len_mean: 293.7
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.370000000000147
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 4
  episodes_total: 1240
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.750958774740225e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8653750399748484
          entropy_coeff: 0.009999999999999998
          kl: 0.0024515965483713686
          policy_loss: 0.03338778701921304
          total_loss: 1.6515182203716703
          vf_explained_var: 0.019093606621026993
          vf_loss: 1.6267841842439439
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,372,9964.16,372000,-29.37,-22.8,-39.8,293.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-28_23-52-38
  done: false
  episode_len_mean: 294.46
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.446000000000154
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 3
  episodes_total: 1243
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.875479387370112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9417862123913235
          entropy_coeff: 0.009999999999999998
          kl: 0.005623766352914651
          policy_loss: -0.09725310893522368
          total_loss: 1.441112376583947
          vf_explained_var: 0.14390969276428223
          vf_loss: 1.5477833482954237
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,373,9990.27,373000,-29.446,-22.8,-39.8,294.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-28_23-53-04
  done: false
  episode_len_mean: 296.13
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.613000000000156
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 4
  episodes_total: 1247
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.875479387370112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9320585760805342
          entropy_coeff: 0.009999999999999998
          kl: 0.0059485768180695215
          policy_loss: 0.008814432223637899
          total_loss: 1.6377762330902947
          vf_explained_var: 0.04251507669687271
          vf_loss: 1.638282385137346
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,374,10016.2,374000,-29.613,-22.8,-39.8,296.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-28_23-53-29
  done: false
  episode_len_mean: 297.92
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -29.792000000000154
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 3
  episodes_total: 1250
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.875479387370112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9190812779797448
          entropy_coeff: 0.009999999999999998
          kl: 0.006926457199086163
          policy_loss: -0.10137474917703204
          total_loss: 1.5067136340671115
          vf_explained_var: -0.02123648300766945
          vf_loss: 1.6172792050573561
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,375,10041.1,375000,-29.792,-22.9,-39.8,297.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-28_23-53-54
  done: false
  episode_len_mean: 300.05
  episode_media: {}
  episode_reward_max: -23.600000000000065
  episode_reward_mean: -30.00500000000016
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 4
  episodes_total: 1254
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.875479387370112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8230281432469686
          entropy_coeff: 0.009999999999999998
          kl: 0.0089056128320895
          policy_loss: 0.020439894207649762
          total_loss: 1.577539732721117
          vf_explained_var: 0.03590967506170273
          vf_loss: 1.5653301265504624
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,376,10065.8,376000,-30.005,-23.6,-39.8,300.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-28_23-54-20
  done: false
  episode_len_mean: 300.86
  episode_media: {}
  episode_reward_max: -23.600000000000065
  episode_reward_mean: -30.08600000000016
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 3
  episodes_total: 1257
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.875479387370112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8211744162771437
          entropy_coeff: 0.009999999999999998
          kl: 0.005101535323449724
          policy_loss: -0.09510775779684384
          total_loss: 1.4136201567120021
          vf_explained_var: 0.09572260826826096
          vf_loss: 1.5169396771325006
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,377,10092.1,377000,-30.086,-23.6,-39.8,300.86




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-28_23-55-05
  done: false
  episode_len_mean: 300.81
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -30.081000000000152
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 4
  episodes_total: 1261
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.875479387370112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7222400671905942
          entropy_coeff: 0.009999999999999998
          kl: 0.005760415417199264
          policy_loss: -0.04152955661217372
          total_loss: 1.5276249316003587
          vf_explained_var: 0.07739020138978958
          vf_loss: 1.5763768908050326
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,378,10137,378000,-30.081,-22.6,-39.8,300.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-28_23-55-33
  done: false
  episode_len_mean: 300.69
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -30.069000000000155
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 4
  episodes_total: 1265
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.875479387370112e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7221685906251272
          entropy_coeff: 0.009999999999999998
          kl: 0.004038469464375854
          policy_loss: -0.02969740397400326
          total_loss: 1.3948921468522815
          vf_explained_var: 0.12300501018762589
          vf_loss: 1.4318112439579433
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,379,10165,379000,-30.069,-22.6,-39.8,300.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-28_23-56-01
  done: false
  episode_len_mean: 300.3
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -30.030000000000154
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 4
  episodes_total: 1269
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.937739693685056e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7991076449553172
          entropy_coeff: 0.009999999999999998
          kl: 0.007741047921003701
          policy_loss: 0.009134238792790307
          total_loss: 0.7441281873318885
          vf_explained_var: 0.6492297053337097
          vf_loss: 0.742985011306074
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,380,10192.6,380000,-30.03,-22.6,-39.8,300.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-28_23-56-26
  done: false
  episode_len_mean: 299.44
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -29.944000000000155
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 4
  episodes_total: 1273
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.937739693685056e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9303496877352396
          entropy_coeff: 0.009999999999999998
          kl: 0.018327339511432526
          policy_loss: 0.008868951184882057
          total_loss: 1.6102629939715067
          vf_explained_var: 0.39397698640823364
          vf_loss: 1.6106975462701585
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,381,10217.7,381000,-29.944,-22.6,-39.8,299.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-28_23-56-52
  done: false
  episode_len_mean: 297.43
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -29.743000000000148
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 4
  episodes_total: 1277
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.937739693685056e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0133350604110294
          entropy_coeff: 0.009999999999999998
          kl: 0.009813667185626053
          policy_loss: 0.007367325118846363
          total_loss: 1.1207746843496957
          vf_explained_var: 0.47096189856529236
          vf_loss: 1.1235407147142622
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,382,10244.3,382000,-29.743,-22.6,-39.8,297.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-28_23-57-18
  done: false
  episode_len_mean: 296.15
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -29.615000000000155
  episode_reward_min: -39.800000000000296
  episodes_this_iter: 3
  episodes_total: 1280
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.937739693685056e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.98604110678037
          entropy_coeff: 0.009999999999999998
          kl: 0.006189643569287126
          policy_loss: -0.007399236741993162
          total_loss: 0.5809486587842305
          vf_explained_var: 0.511695384979248
          vf_loss: 0.598208311200142
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,383,10269.4,383000,-29.615,-22.6,-39.8,296.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-28_23-57-45
  done: false
  episode_len_mean: 292.11
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -29.211000000000155
  episode_reward_min: -35.20000000000023
  episodes_this_iter: 4
  episodes_total: 1284
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.937739693685056e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8470658699671427
          entropy_coeff: 0.009999999999999998
          kl: 0.004204260571835741
          policy_loss: 0.034065028445588216
          total_loss: 0.7166336334413952
          vf_explained_var: 0.6913686394691467
          vf_loss: 0.6910392711559932
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,384,10296.8,384000,-29.211,-22.6,-35.2,292.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-28_23-58-12
  done: false
  episode_len_mean: 289.17
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -28.91700000000014
  episode_reward_min: -35.20000000000023
  episodes_this_iter: 4
  episodes_total: 1288
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.68869846842528e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8419170578320821
          entropy_coeff: 0.009999999999999998
          kl: 0.009282442505084395
          policy_loss: -0.03681081665886773
          total_loss: 1.418275041050381
          vf_explained_var: 0.13728538155555725
          vf_loss: 1.4635050243801542
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,385,10324.2,385000,-28.917,-22.6,-35.2,289.17




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-28_23-58-58
  done: false
  episode_len_mean: 286.79
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -28.679000000000148
  episode_reward_min: -35.20000000000023
  episodes_this_iter: 4
  episodes_total: 1292
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.68869846842528e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9289936469660864
          entropy_coeff: 0.009999999999999998
          kl: 0.0027357423924389422
          policy_loss: 0.03748999453253216
          total_loss: 1.014686002333959
          vf_explained_var: 0.3888319730758667
          vf_loss: 0.9864859466751417
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,386,10370.1,386000,-28.679,-21.9,-35.2,286.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-28_23-59-27
  done: false
  episode_len_mean: 285.29
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -28.52900000000013
  episode_reward_min: -35.20000000000023
  episodes_this_iter: 4
  episodes_total: 1296
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.84434923421264e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7957843502362569
          entropy_coeff: 0.009999999999999998
          kl: 0.01153515115140963
          policy_loss: 0.006985279255443149
          total_loss: 1.1640259948041705
          vf_explained_var: 0.40890470147132874
          vf_loss: 1.1649985591570535
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,387,10398.2,387000,-28.529,-21.9,-35.2,285.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-28_23-59-54
  done: false
  episode_len_mean: 284.61
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -28.461000000000137
  episode_reward_min: -35.20000000000023
  episodes_this_iter: 3
  episodes_total: 1299
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.84434923421264e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8235076818201277
          entropy_coeff: 0.009999999999999998
          kl: 0.014436257111315775
          policy_loss: -0.021910121871365443
          total_loss: 1.117863086859385
          vf_explained_var: 0.18293461203575134
          vf_loss: 1.1480082809925078
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,388,10425.3,388000,-28.461,-21.9,-35.2,284.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-29_00-00-23
  done: false
  episode_len_mean: 282.11
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -28.211000000000126
  episode_reward_min: -35.20000000000023
  episodes_this_iter: 4
  episodes_total: 1303
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.84434923421264e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.784406946765052
          entropy_coeff: 0.009999999999999998
          kl: 0.006276696580822892
          policy_loss: 0.0022244599958260855
          total_loss: 0.9714254018333223
          vf_explained_var: 0.5353885293006897
          vf_loss: 0.9770450055599212
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,389,10454.5,389000,-28.211,-21.9,-35.2,282.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-29_00-00-50
  done: false
  episode_len_mean: 278.37
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.83700000000012
  episode_reward_min: -35.20000000000023
  episodes_this_iter: 4
  episodes_total: 1307
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.84434923421264e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7600087066491444
          entropy_coeff: 0.009999999999999998
          kl: 0.007824066574044271
          policy_loss: 0.03345574186080032
          total_loss: 0.6392164073056645
          vf_explained_var: 0.8264443874359131
          vf_loss: 0.6133607468671269
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,390,10481.2,390000,-27.837,-21.9,-35.2,278.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-29_00-01-18
  done: false
  episode_len_mean: 274.62
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.462000000000117
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 1311
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.84434923421264e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7616722272502051
          entropy_coeff: 0.009999999999999998
          kl: 0.0045590829275890124
          policy_loss: 0.0256942024661435
          total_loss: 0.6037846048672993
          vf_explained_var: 0.7910254001617432
          vf_loss: 0.5857071257299847
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,391,10509.6,391000,-27.462,-21.9,-34,274.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-29_00-01-46
  done: false
  episode_len_mean: 272.21
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.221000000000117
  episode_reward_min: -34.00000000000021
  episodes_this_iter: 4
  episodes_total: 1315
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.42217461710632e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6990677681234148
          entropy_coeff: 0.009999999999999998
          kl: 0.0030070812894336402
          policy_loss: -0.026701780491405064
          total_loss: 0.8117907239331139
          vf_explained_var: 0.7639699578285217
          vf_loss: 0.8454831909802225
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,392,10537.4,392000,-27.221,-21.9,-34,272.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-29_00-02-13
  done: false
  episode_len_mean: 269.87
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.987000000000116
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 1319
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.21108730855316e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6751656028959486
          entropy_coeff: 0.009999999999999998
          kl: 0.0023215429017204416
          policy_loss: 0.07236572644776768
          total_loss: 0.9434543761942121
          vf_explained_var: 0.7357808351516724
          vf_loss: 0.8778403010633257
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,393,10564.5,393000,-26.987,-21.9,-32.6,269.87




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-29_00-02-58
  done: false
  episode_len_mean: 267.39
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.739000000000114
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1323
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.0554365427658e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7169483635160658
          entropy_coeff: 0.009999999999999998
          kl: 0.0051501195482964845
          policy_loss: 0.08561958571275076
          total_loss: 0.7989285614755418
          vf_explained_var: 0.6462869644165039
          vf_loss: 0.720478457874722
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,394,10610,394000,-26.739,-21.9,-31.4,267.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-29_00-03-26
  done: false
  episode_len_mean: 265.82
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.582000000000107
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1327
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.0554365427658e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7215021153291067
          entropy_coeff: 0.009999999999999998
          kl: 0.00479102524316725
          policy_loss: 0.006202574488189485
          total_loss: 0.9731268571482764
          vf_explained_var: 0.4492015838623047
          vf_loss: 0.9741393122408125
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,395,10637.7,395000,-26.582,-21.9,-31.4,265.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-29_00-03-53
  done: false
  episode_len_mean: 265.41
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.541000000000118
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1331
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0277182713829e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6748412582609389
          entropy_coeff: 0.009999999999999998
          kl: 0.0076639188641621555
          policy_loss: 0.10697023479474915
          total_loss: 0.6920659868253602
          vf_explained_var: 0.8053392171859741
          vf_loss: 0.5918441639178329
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,396,10664.9,396000,-26.541,-21.9,-31.4,265.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-29_00-04-22
  done: false
  episode_len_mean: 264.47
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.447000000000113
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1335
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0277182713829e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7669945902294583
          entropy_coeff: 0.009999999999999998
          kl: 0.01941554873233476
          policy_loss: -0.015876170330577425
          total_loss: 0.6782298896047804
          vf_explained_var: 0.7010889649391174
          vf_loss: 0.7017760058244069
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,397,10693.6,397000,-26.447,-21.9,-31.4,264.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-29_00-04-50
  done: false
  episode_len_mean: 263.51
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.351000000000102
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1339
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0277182713829e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7305825816260444
          entropy_coeff: 0.009999999999999998
          kl: 0.00633134637876438
          policy_loss: -0.055493367753095095
          total_loss: 1.0414959854549832
          vf_explained_var: 0.3666892945766449
          vf_loss: 1.1042951762676239
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,398,10721.8,398000,-26.351,-21.9,-31.4,263.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-29_00-05-15
  done: false
  episode_len_mean: 263.01
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.301000000000105
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 3
  episodes_total: 1342
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0277182713829e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.9335976309246488
          entropy_coeff: 0.009999999999999998
          kl: 0.007534236845762463
          policy_loss: -0.02930946494970057
          total_loss: 0.8545380671819051
          vf_explained_var: 0.5312279462814331
          vf_loss: 0.8931834929519229
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,399,10746.7,399000,-26.301,-21.9,-31.4,263.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-29_00-05-42
  done: false
  episode_len_mean: 262.73
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.273000000000103
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1346
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0277182713829e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.9946007344457838
          entropy_coeff: 0.009999999999999998
          kl: 0.011645521598701416
          policy_loss: -0.0013044148683547974
          total_loss: 0.9180277407169342
          vf_explained_var: 0.4027722179889679
          vf_loss: 0.9292781612939305
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,400,10773.2,400000,-26.273,-21.9,-31.4,262.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-29_00-06-07
  done: false
  episode_len_mean: 263.01
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.30100000000011
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 3
  episodes_total: 1349
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0277182713829e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.999248124493493
          entropy_coeff: 0.009999999999999998
          kl: 0.016919515115330973
          policy_loss: -0.07916441907485326
          total_loss: 1.232088765833113
          vf_explained_var: 0.14764343202114105
          vf_loss: 1.32124566104677
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,401,10798.6,401000,-26.301,-21.9,-31.4,263.01




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-29_00-06-51
  done: false
  episode_len_mean: 262.09
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.209000000000103
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1353
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0277182713829e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8804183158609602
          entropy_coeff: 0.009999999999999998
          kl: 0.010264872681474755
          policy_loss: 0.0167498047153155
          total_loss: 0.953982275724411
          vf_explained_var: 0.28443896770477295
          vf_loss: 0.946036649412579
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,402,10842.6,402000,-26.209,-21.9,-31.4,262.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-29_00-07-18
  done: false
  episode_len_mean: 261.8
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.1800000000001
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1357
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0277182713829e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.917293832037184
          entropy_coeff: 0.009999999999999998
          kl: 0.007295799244109149
          policy_loss: 0.02288335429297553
          total_loss: 1.2771205173598394
          vf_explained_var: 0.18144270777702332
          vf_loss: 1.2634101145797305
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,403,10869.3,403000,-26.18,-21.9,-31.4,261.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-29_00-07-45
  done: false
  episode_len_mean: 262.07
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.207000000000097
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 3
  episodes_total: 1360
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0277182713829e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7854533447159662
          entropy_coeff: 0.009999999999999998
          kl: 0.020685809709417195
          policy_loss: -0.1040243117345704
          total_loss: 1.0257630990611182
          vf_explained_var: 0.3032021224498749
          vf_loss: 1.137641947136985
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,404,10896.4,404000,-26.207,-21.9,-31.4,262.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-29_00-08-11
  done: false
  episode_len_mean: 262.85
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.285000000000103
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1364
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5415774070743484e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7897845188776652
          entropy_coeff: 0.009999999999999998
          kl: 0.0034499494110176265
          policy_loss: -0.005184474090735118
          total_loss: 1.488767725891537
          vf_explained_var: 0.06773841381072998
          vf_loss: 1.5018500341309442
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,405,10922.8,405000,-26.285,-21.9,-31.4,262.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-29_00-08-39
  done: false
  episode_len_mean: 263.08
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.30800000000011
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1368
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2707887035371742e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7486494236522251
          entropy_coeff: 0.009999999999999998
          kl: 0.0041611396495296465
          policy_loss: 0.0037298944261338974
          total_loss: 1.480382920636071
          vf_explained_var: 0.01765436679124832
          vf_loss: 1.4841395219167073
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,406,10950.5,406000,-26.308,-21.9,-31.4,263.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-29_00-09-06
  done: false
  episode_len_mean: 263.65
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.365000000000105
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1372
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1353943517685871e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6691920306947496
          entropy_coeff: 0.009999999999999998
          kl: 0.009705584440345265
          policy_loss: 0.014097708670629396
          total_loss: 1.2210571295685237
          vf_explained_var: 0.22784513235092163
          vf_loss: 1.2136513411998748
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,407,10976.9,407000,-26.365,-21.9,-31.4,263.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-29_00-09-33
  done: false
  episode_len_mean: 262.59
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.2590000000001
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 1376
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1353943517685871e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5953894015815523
          entropy_coeff: 0.009999999999999998
          kl: 0.013087591698944854
          policy_loss: 0.019951782127221426
          total_loss: 1.0326864580313364
          vf_explained_var: 0.5963907241821289
          vf_loss: 1.0186885747644636
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,408,11004.2,408000,-26.259,-21.9,-31.4,262.59




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-29_00-10-17
  done: false
  episode_len_mean: 261.19
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.119000000000106
  episode_reward_min: -31.200000000000173
  episodes_this_iter: 4
  episodes_total: 1380
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1353943517685871e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6307801306247711
          entropy_coeff: 0.009999999999999998
          kl: 0.004567501112066097
          policy_loss: 0.07067944341235691
          total_loss: 0.7805421703391605
          vf_explained_var: 0.5507524013519287
          vf_loss: 0.7161705278688006
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,409,11048.3,409000,-26.119,-21.9,-31.2,261.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-29_00-10-42
  done: false
  episode_len_mean: 263.15
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.3150000000001
  episode_reward_min: -40.400000000000304
  episodes_this_iter: 3
  episodes_total: 1383
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.6769717588429356e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.867875744899114
          entropy_coeff: 0.009999999999999998
          kl: 0.005878354580903286
          policy_loss: 0.013833253085613251
          total_loss: 1.252687653568056
          vf_explained_var: 0.3387749195098877
          vf_loss: 1.2475331470370292
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,410,11072.9,410000,-26.315,-21.9,-40.4,263.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-29_00-11-03
  done: false
  episode_len_mean: 264.42
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.442000000000107
  episode_reward_min: -40.400000000000304
  episodes_this_iter: 2
  episodes_total: 1385
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.6769717588429356e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9103063344955444
          entropy_coeff: 0.009999999999999998
          kl: 0.01201069315266882
          policy_loss: -0.07324820011854172
          total_loss: 1.1443515671624078
          vf_explained_var: 0.25189879536628723
          vf_loss: 1.226702836404244
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,411,11094,411000,-26.442,-21.9,-40.4,264.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-29_00-11-21
  done: false
  episode_len_mean: 269.26
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.926000000000116
  episode_reward_min: -46.9000000000004
  episodes_this_iter: 3
  episodes_total: 1388
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.6769717588429356e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.0050942056708865
          entropy_coeff: 0.009999999999999998
          kl: 0.008742810649587086
          policy_loss: 0.035503242082066004
          total_loss: 1.3698514560858408
          vf_explained_var: 0.06028174236416817
          vf_loss: 1.3443991435898675
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,412,11111.8,412000,-26.926,-21.9,-46.9,269.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-29_00-11-42
  done: false
  episode_len_mean: 272.77
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.277000000000122
  episode_reward_min: -46.9000000000004
  episodes_this_iter: 3
  episodes_total: 1391
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.6769717588429356e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9087408476405674
          entropy_coeff: 0.009999999999999998
          kl: 0.007529506032554936
          policy_loss: 0.05082534294989374
          total_loss: 1.4194976747035981
          vf_explained_var: -0.28150513768196106
          vf_loss: 1.3777597395910157
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,413,11133,413000,-27.277,-22.2,-46.9,272.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-29_00-12-02
  done: false
  episode_len_mean: 275.83
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.58300000000012
  episode_reward_min: -46.9000000000004
  episodes_this_iter: 2
  episodes_total: 1393
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.6769717588429356e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9958641356892056
          entropy_coeff: 0.009999999999999998
          kl: 0.011439867270871002
          policy_loss: 0.03200050708320406
          total_loss: 0.7601241239243084
          vf_explained_var: 0.056666623800992966
          vf_loss: 0.7380822649691254
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,414,11153.1,414000,-27.583,-22.2,-46.9,275.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-29_00-12-20
  done: false
  episode_len_mean: 279.97
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -27.997000000000135
  episode_reward_min: -48.300000000000416
  episodes_this_iter: 3
  episodes_total: 1396
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.6769717588429356e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9339867618348864
          entropy_coeff: 0.009999999999999998
          kl: 0.006500498886098585
          policy_loss: 0.04927145921521717
          total_loss: 1.3749373796913358
          vf_explained_var: -0.13038651645183563
          vf_loss: 1.3350057696302733
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,415,11171.2,415000,-27.997,-22.2,-48.3,279.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-29_00-12-39
  done: false
  episode_len_mean: 282.08
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -28.208000000000137
  episode_reward_min: -48.300000000000416
  episodes_this_iter: 2
  episodes_total: 1398
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.6769717588429356e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8796645005544027
          entropy_coeff: 0.009999999999999998
          kl: 0.004756203159868496
          policy_loss: -0.07627934076719814
          total_loss: 1.1988389525148604
          vf_explained_var: 0.2641703188419342
          vf_loss: 1.2839149322774674
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,416,11190.1,416000,-28.208,-22.2,-48.3,282.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-29_00-12-57
  done: false
  episode_len_mean: 285.9
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -28.590000000000142
  episode_reward_min: -56.30000000000053
  episodes_this_iter: 2
  episodes_total: 1400
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8384858794214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8271142052279579
          entropy_coeff: 0.009999999999999998
          kl: 0.005189650036856048
          policy_loss: -0.06469344306323263
          total_loss: 1.2037708656655417
          vf_explained_var: 0.03374467045068741
          vf_loss: 1.2767354604270724
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,417,11207.7,417000,-28.59,-22.2,-56.3,285.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-29_00-13-11
  done: false
  episode_len_mean: 292.42
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -29.242000000000143
  episode_reward_min: -58.100000000000556
  episodes_this_iter: 2
  episodes_total: 1402
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8384858794214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.967081360022227
          entropy_coeff: 0.009999999999999998
          kl: 0.011610741523715933
          policy_loss: 0.01558662669526206
          total_loss: 0.8583369284868241
          vf_explained_var: -0.3107076585292816
          vf_loss: 0.8524211309022374
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,418,11222.1,418000,-29.242,-22.2,-58.1,292.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-29_00-13-30
  done: false
  episode_len_mean: 295.66
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -29.566000000000148
  episode_reward_min: -58.100000000000556
  episodes_this_iter: 2
  episodes_total: 1404
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8384858794214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9436158372296227
          entropy_coeff: 0.009999999999999998
          kl: 0.017543676499794904
          policy_loss: -0.056955214672618444
          total_loss: 0.9387629138098823
          vf_explained_var: 0.03226755931973457
          vf_loss: 1.0051542894707786
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 41900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,419,11240.9,419000,-29.566,-22.2,-58.1,295.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-29_00-13-47
  done: false
  episode_len_mean: 301.55
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -30.155000000000157
  episode_reward_min: -58.100000000000556
  episodes_this_iter: 2
  episodes_total: 1406
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8384858794214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9872609668307835
          entropy_coeff: 0.009999999999999998
          kl: 0.005081491265012161
          policy_loss: -0.09223923716280195
          total_loss: 1.255896579556995
          vf_explained_var: -0.2534463703632355
          vf_loss: 1.3580084284146627
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,420,11258.3,420000,-30.155,-22.2,-58.1,301.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-29_00-14-00
  done: false
  episode_len_mean: 305.6
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -30.560000000000162
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 2
  episodes_total: 1408
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8384858794214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9275206115510729
          entropy_coeff: 0.009999999999999998
          kl: 0.01759319425399312
          policy_loss: -0.07270166542794969
          total_loss: 0.9694731015298101
          vf_explained_var: -0.26350224018096924
          vf_loss: 1.0514499439133538
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,421,11271.4,421000,-30.56,-22.2,-59.2,305.6




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-29_00-14-36
  done: false
  episode_len_mean: 311.6
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -31.160000000000174
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1411
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8384858794214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9263850894239214
          entropy_coeff: 0.009999999999999998
          kl: 0.01131342054747443
          policy_loss: -0.027977060443825193
          total_loss: 1.4193497389554977
          vf_explained_var: 0.19560939073562622
          vf_loss: 1.4565906428628497
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,422,11306.7,422000,-31.16,-22.2,-59.2,311.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-29_00-14-57
  done: false
  episode_len_mean: 314.36
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -31.436000000000178
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 2
  episodes_total: 1413
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8384858794214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.876675464047326
          entropy_coeff: 0.009999999999999998
          kl: 0.005299996981315915
          policy_loss: -0.09410836646954218
          total_loss: 1.2095753216081195
          vf_explained_var: 0.02114623412489891
          vf_loss: 1.3124504493342506
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,423,11327.5,423000,-31.436,-22.2,-59.2,314.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-29_00-15-18
  done: false
  episode_len_mean: 316.96
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -31.696000000000176
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1416
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8384858794214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7342930992444356
          entropy_coeff: 0.009999999999999998
          kl: 0.00785168837678797
          policy_loss: -0.10936543312337664
          total_loss: 1.332491871383455
          vf_explained_var: 0.30453163385391235
          vf_loss: 1.4492002288500467
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,424,11348.7,424000,-31.696,-22.2,-59.2,316.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-29_00-15-37
  done: false
  episode_len_mean: 320.35
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -32.03500000000018
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1419
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8384858794214678e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8860311269760132
          entropy_coeff: 0.009999999999999998
          kl: 0.004917322944923165
          policy_loss: 0.04147609588172701
          total_loss: 1.2485341846942901
          vf_explained_var: 0.14945392310619354
          vf_loss: 1.2159183972411685
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,425,11368.1,425000,-32.035,-22.2,-59.2,320.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-29_00-15-58
  done: false
  episode_len_mean: 322.59
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -32.259000000000185
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 2
  episodes_total: 1421
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4192429397107339e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8717292017406888
          entropy_coeff: 0.009999999999999998
          kl: 0.009074673607540105
          policy_loss: -0.026596240864859687
          total_loss: 0.9937024328443739
          vf_explained_var: 0.25385552644729614
          vf_loss: 1.0290159632762272
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,426,11389.1,426000,-32.259,-22.2,-59.2,322.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-29_00-16-20
  done: false
  episode_len_mean: 326.12
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -32.612000000000194
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1424
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4192429397107339e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.9144691771931118
          entropy_coeff: 0.009999999999999998
          kl: 0.009669728302469326
          policy_loss: -0.061382634109920925
          total_loss: 1.3344501144356198
          vf_explained_var: 0.055776745080947876
          vf_loss: 1.40497743818495
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,427,11410.9,427000,-32.612,-22.2,-59.2,326.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-29_00-16-40
  done: false
  episode_len_mean: 328.85
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -32.8850000000002
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1427
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4192429397107339e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8078858660327064
          entropy_coeff: 0.009999999999999998
          kl: 0.04086208039638767
          policy_loss: 0.027794113920794592
          total_loss: 0.8561716548270649
          vf_explained_var: 0.22943541407585144
          vf_loss: 0.8364563912153244
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,428,11431,428000,-32.885,-22.2,-59.2,328.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-29_00-17-02
  done: false
  episode_len_mean: 331.87
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -33.1870000000002
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1430
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.128864409566102e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.736009685198466
          entropy_coeff: 0.009999999999999998
          kl: 0.005341901103744028
          policy_loss: 0.03875900920894411
          total_loss: 1.1822697162628173
          vf_explained_var: -0.019342778250575066
          vf_loss: 1.150870801922348
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,429,11453.1,429000,-33.187,-22.2,-59.2,331.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-29_00-17-28
  done: false
  episode_len_mean: 332.76
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -33.2760000000002
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1433
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.128864409566102e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6496625443299612
          entropy_coeff: 0.009999999999999998
          kl: 0.005816488494241382
          policy_loss: -0.12743381551570362
          total_loss: 0.824947229690022
          vf_explained_var: 0.5009609460830688
          vf_loss: 0.9588776740762922
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,430,11478.3,430000,-33.276,-22.2,-59.2,332.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-29_00-17-52
  done: false
  episode_len_mean: 334.79
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -33.479000000000205
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 4
  episodes_total: 1437
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.128864409566102e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7775975088278453
          entropy_coeff: 0.009999999999999998
          kl: 0.07840953758096872
          policy_loss: 0.01419347185227606
          total_loss: 1.8355219523111979
          vf_explained_var: 0.18014681339263916
          vf_loss: 1.829104443391164
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,431,11502.6,431000,-33.479,-22.2,-59.2,334.79




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-29_00-18-36
  done: false
  episode_len_mean: 335.28
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -33.528000000000205
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1440
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1932966143491524e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.6255469404988818
          entropy_coeff: 0.009999999999999998
          kl: 0.012641518637604795
          policy_loss: -0.06368490507205328
          total_loss: 1.3118589222431183
          vf_explained_var: 0.4053424298763275
          vf_loss: 1.381799292895529
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,432,11546.5,432000,-33.528,-22.2,-59.2,335.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-29_00-18-56
  done: false
  episode_len_mean: 337.26
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -33.726000000000205
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1443
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1932966143491524e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8406609535217285
          entropy_coeff: 0.009999999999999998
          kl: 0.023899395123099906
          policy_loss: 0.004887356443537606
          total_loss: 0.9636951701508628
          vf_explained_var: 0.31205302476882935
          vf_loss: 0.9672144075234731
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,433,11567.1,433000,-33.726,-22.2,-59.2,337.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-29_00-19-13
  done: false
  episode_len_mean: 341.07
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.107000000000205
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 2
  episodes_total: 1445
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.7899449215237275e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8460556328296661
          entropy_coeff: 0.009999999999999998
          kl: 0.016976075232629898
          policy_loss: -0.07058944006760916
          total_loss: 1.251573618915346
          vf_explained_var: 0.18943333625793457
          vf_loss: 1.330623611435294
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,434,11584.1,434000,-34.107,-22.2,-59.2,341.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-29_00-19-34
  done: false
  episode_len_mean: 343.48
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.34800000000021
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1448
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.7899449215237275e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7765809549225702
          entropy_coeff: 0.009999999999999998
          kl: 0.03832384501038655
          policy_loss: 0.0381364445719454
          total_loss: 1.1353454581565328
          vf_explained_var: -0.03889773041009903
          vf_loss: 1.1049748291468455
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,435,11604.2,435000,-34.348,-22.2,-59.2,343.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-29_00-20-00
  done: false
  episode_len_mean: 344.35
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.43500000000022
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 4
  episodes_total: 1452
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.1849173822855925e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7543071978622012
          entropy_coeff: 0.009999999999999998
          kl: 0.01627683725461916
          policy_loss: 0.05552299676669969
          total_loss: 1.5206835667292278
          vf_explained_var: 0.3249310255050659
          vf_loss: 1.472703649600347
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,436,11630.5,436000,-34.435,-22.2,-59.2,344.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-29_00-20-29
  done: false
  episode_len_mean: 342.87
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.28700000000022
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 4
  episodes_total: 1456
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.1849173822855925e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.43247646159595915
          entropy_coeff: 0.009999999999999998
          kl: 0.006709285765185769
          policy_loss: 0.07458798206514783
          total_loss: 1.1579109496540494
          vf_explained_var: 0.35377296805381775
          vf_loss: 1.0876477440198262
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,437,11659.8,437000,-34.287,-21.9,-59.2,342.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-29_00-20-57
  done: false
  episode_len_mean: 342.72
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.27200000000022
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 4
  episodes_total: 1460
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.1849173822855925e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5636592533853318
          entropy_coeff: 0.009999999999999998
          kl: 0.01243685773096009
          policy_loss: 0.03427531197667122
          total_loss: 1.46477556626002
          vf_explained_var: 0.34562399983406067
          vf_loss: 1.4361368384626176
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,438,11687.8,438000,-34.272,-21.9,-59.2,342.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-29_00-21-24
  done: false
  episode_len_mean: 342.17
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.21700000000022
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1463
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.1849173822855925e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.7173869176043405
          entropy_coeff: 0.009999999999999998
          kl: 0.015037955010454596
          policy_loss: -0.03385724706782235
          total_loss: 1.1264842477109698
          vf_explained_var: 0.1996186077594757
          vf_loss: 1.1675153768000504
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,439,11714.5,439000,-34.217,-21.9,-59.2,342.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-29_00-21-48
  done: false
  episode_len_mean: 343.25
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.32500000000022
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 4
  episodes_total: 1467
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.1849173822855925e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.8311908536487156
          entropy_coeff: 0.009999999999999998
          kl: 0.01610760482273166
          policy_loss: 0.03787026877204577
          total_loss: 1.153369101550844
          vf_explained_var: 0.583750307559967
          vf_loss: 1.1238107376628452
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,440,11738.7,440000,-34.325,-21.9,-59.2,343.25




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-29_00-22-30
  done: false
  episode_len_mean: 343.49
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.34900000000022
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1470
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.1849173822855925e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.0226778772142198
          entropy_coeff: 0.009999999999999998
          kl: 0.034297758996611645
          policy_loss: 0.0015910170558426115
          total_loss: 0.5972647414904916
          vf_explained_var: 0.8140798211097717
          vf_loss: 0.6059005038605796
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,441,11781,441000,-34.349,-21.9,-59.2,343.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-29_00-22-52
  done: false
  episode_len_mean: 345.4
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.54000000000022
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1473
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0777376073428386e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2889533287949033
          entropy_coeff: 0.009999999999999998
          kl: 0.029189088946489822
          policy_loss: 0.022626142866081663
          total_loss: 0.7712169640594059
          vf_explained_var: 0.7295207381248474
          vf_loss: 0.7614803525308768
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,442,11802.2,442000,-34.54,-21.9,-59.2,345.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-29_00-23-15
  done: false
  episode_len_mean: 347.6
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.760000000000225
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 4
  episodes_total: 1477
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.616606411014258e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.3911765694618226
          entropy_coeff: 0.009999999999999998
          kl: 0.01416868329364473
          policy_loss: 0.10917448914713329
          total_loss: 1.03108484811253
          vf_explained_var: 0.5693072080612183
          vf_loss: 0.9358221219645606
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,443,11825.7,443000,-34.76,-21.9,-59.2,347.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-29_00-23-38
  done: false
  episode_len_mean: 349.72
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.97200000000023
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1480
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.616606411014258e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.5024021135436163
          entropy_coeff: 0.009999999999999998
          kl: 0.014557933926839794
          policy_loss: -0.02965617229541143
          total_loss: 0.45865778128306073
          vf_explained_var: 0.8142526149749756
          vf_loss: 0.5033379765848319
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,444,11848.2,444000,-34.972,-21.9,-59.2,349.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-29_00-24-04
  done: false
  episode_len_mean: 347.96
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.79600000000022
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1483
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.616606411014258e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5982913500732846
          entropy_coeff: 0.009999999999999998
          kl: 0.012855427391532242
          policy_loss: -0.11113131990035376
          total_loss: 0.7794369177685844
          vf_explained_var: 0.6437304615974426
          vf_loss: 0.8965511618802945
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,445,11874.7,445000,-34.796,-21.9,-59.2,347.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-29_00-24-35
  done: false
  episode_len_mean: 341.06
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.106000000000215
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 5
  episodes_total: 1488
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.616606411014258e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6148251632849375
          entropy_coeff: 0.009999999999999998
          kl: 0.007963302186480645
          policy_loss: 0.0645764175388548
          total_loss: 1.4488022685050965
          vf_explained_var: 0.45646384358406067
          vf_loss: 1.3903741180896758
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,446,11905.3,446000,-34.106,-21.9,-59.2,341.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-29_00-24-57
  done: false
  episode_len_mean: 340.01
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.00100000000021
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 2
  episodes_total: 1490
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.616606411014258e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.5837339758872986
          entropy_coeff: 0.009999999999999998
          kl: 0.026541222285415087
          policy_loss: -0.177507886207766
          total_loss: 0.6399734935826725
          vf_explained_var: 0.7566853761672974
          vf_loss: 0.8333187283741104
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,447,11927,447000,-34.001,-21.9,-59.2,340.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-29_00-25-16
  done: false
  episode_len_mean: 340.89
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.08900000000021
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1493
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424909616521386e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.2137541188134087
          entropy_coeff: 0.009999999999999998
          kl: 0.019179289576212868
          policy_loss: 0.09504142055908839
          total_loss: 0.5208749843968286
          vf_explained_var: 0.651673436164856
          vf_loss: 0.44797109858029416
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,448,11946.1,448000,-34.089,-21.9,-59.2,340.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-29_00-25-37
  done: false
  episode_len_mean: 339.72
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.972000000000214
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1496
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424909616521386e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.7005656305286618
          entropy_coeff: 0.009999999999999998
          kl: 0.015706631094311314
          policy_loss: -0.012551765309439765
          total_loss: 0.6561861048142116
          vf_explained_var: 0.4092034697532654
          vf_loss: 0.6857435210297506
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,449,11967.3,449000,-33.972,-21.9,-59.2,339.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-29_00-25-58
  done: false
  episode_len_mean: 338.85
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.88500000000022
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 2
  episodes_total: 1498
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424909616521386e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.6837307863765292
          entropy_coeff: 0.009999999999999998
          kl: 0.010872913157385176
          policy_loss: -0.15291042725245158
          total_loss: 0.32018784328053396
          vf_explained_var: 0.7023447751998901
          vf_loss: 0.4899355750117037
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,450,11988.8,450000,-33.885,-21.9,-59.2,338.85




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-29_00-26-34
  done: false
  episode_len_mean: 337.1
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.710000000000214
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1501
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424909616521386e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.7316806740230983
          entropy_coeff: 0.009999999999999998
          kl: 0.015284821142667068
          policy_loss: 0.07221757570902507
          total_loss: 0.5292274355888367
          vf_explained_var: 0.2890625
          vf_loss: 0.47432666569948195
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,451,12024.3,451000,-33.71,-21.9,-59.2,337.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-29_00-26-55
  done: false
  episode_len_mean: 333.75
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.375000000000206
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 1504
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.424909616521386e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.6264346890979342
          entropy_coeff: 0.009999999999999998
          kl: 0.02349145393888392
          policy_loss: 0.02678489453262753
          total_loss: 1.0863607115215725
          vf_explained_var: 0.0168305616825819
          vf_loss: 1.0758401801188786
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,452,12045,452000,-33.375,-21.9,-59.2,333.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-29_00-27-15
  done: false
  episode_len_mean: 329.63
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -32.9630000000002
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 2
  episodes_total: 1506
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.63736442478208e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.7273899820115832
          entropy_coeff: 0.009999999999999998
          kl: 0.018896558470890945
          policy_loss: -0.09867251217365265
          total_loss: 0.54924803301692
          vf_explained_var: 0.2841256260871887
          vf_loss: 0.6651944319407145
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,453,12065.5,453000,-32.963,-21.9,-59.2,329.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-10-29_00-27-32
  done: false
  episode_len_mean: 329.93
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -32.9930000000002
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 2
  episodes_total: 1508
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.63736442478208e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.9723693794674344
          entropy_coeff: 0.009999999999999998
          kl: 0.013362286331267823
          policy_loss: -0.10737073222796122
          total_loss: 0.8419592870606316
          vf_explained_var: 0.4403269588947296
          vf_loss: 0.9690537207656437
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,454,12082.6,454000,-32.993,-21.9,-55.1,329.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-10-29_00-27-50
  done: false
  episode_len_mean: 329.77
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -32.9770000000002
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1511
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.63736442478208e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.248349976539612
          entropy_coeff: 0.009999999999999998
          kl: 0.011776864371044067
          policy_loss: 0.08958484621511566
          total_loss: 0.6674974570671718
          vf_explained_var: 0.4604039490222931
          vf_loss: 0.6003961230317751
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,455,12099.8,455000,-32.977,-21.9,-55.1,329.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-10-29_00-28-07
  done: false
  episode_len_mean: 330.32
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.032000000000195
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 2
  episodes_total: 1513
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.63736442478208e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.394567498895857
          entropy_coeff: 0.009999999999999998
          kl: 0.01390638059609723
          policy_loss: 0.061174784931871624
          total_loss: 0.672951504919264
          vf_explained_var: 0.034065235406160355
          vf_loss: 0.6357223879959848
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,456,12117.6,456000,-33.032,-21.9,-55.1,330.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-10-29_00-28-30
  done: false
  episode_len_mean: 330.31
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.0310000000002
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1516
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.63736442478208e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.5251935230361091
          entropy_coeff: 0.009999999999999998
          kl: 0.008488939655770947
          policy_loss: 0.01009238577551312
          total_loss: 0.9502550661563873
          vf_explained_var: 0.47088202834129333
          vf_loss: 0.9554146108527978
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 457000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,457,12140.3,457000,-33.031,-21.9,-55.1,330.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-10-29_00-28-49
  done: false
  episode_len_mean: 331.16
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.116000000000206
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 2
  episodes_total: 1518
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.63736442478208e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.20919668674469
          entropy_coeff: 0.009999999999999998
          kl: 0.02592023128485097
          policy_loss: -0.08902715328666899
          total_loss: 1.003359090619617
          vf_explained_var: 0.05880274251103401
          vf_loss: 1.1144782101942434
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,458,12159.6,458000,-33.116,-21.9,-55.1,331.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-10-29_00-29-10
  done: false
  episode_len_mean: 331.55
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.15500000000021
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1521
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.45604663717312e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.7843268301751878
          entropy_coeff: 0.009999999999999998
          kl: 0.01346911381156338
          policy_loss: -0.04762837257650163
          total_loss: 1.38306822180748
          vf_explained_var: 0.08909870684146881
          vf_loss: 1.4485398623678418
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,459,12180.4,459000,-33.155,-21.9,-55.1,331.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-10-29_00-29-31
  done: false
  episode_len_mean: 330.76
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.07600000000021
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1524
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.45604663717312e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.8791684521569145
          entropy_coeff: 0.009999999999999998
          kl: 0.01113284210709414
          policy_loss: 0.03514089427060551
          total_loss: 1.029000899526808
          vf_explained_var: 0.2252025455236435
          vf_loss: 1.0126516848802567
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,460,12200.8,460000,-33.076,-21.9,-55.1,330.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-10-29_00-29-50
  done: false
  episode_len_mean: 332.6
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.260000000000204
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 2
  episodes_total: 1526
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.45604663717312e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.092787848578559
          entropy_coeff: 0.009999999999999998
          kl: 0.019526325170737734
          policy_loss: -0.0961705884999699
          total_loss: 0.9587167604102029
          vf_explained_var: -0.0170681681483984
          vf_loss: 1.0758152219984267
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,461,12220.2,461000,-33.26,-21.9,-55.1,332.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-10-29_00-30-10
  done: false
  episode_len_mean: 333.28
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.32800000000021
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1529
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.45604663717312e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.027813234594133
          entropy_coeff: 0.009999999999999998
          kl: 0.018092079645248817
          policy_loss: 0.044175552825133
          total_loss: 0.9498909460173712
          vf_explained_var: 0.3955029249191284
          vf_loss: 0.9259935257335504
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 462000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,462,12240.3,462000,-33.328,-21.9,-55.1,333.28




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-10-29_00-30-49
  done: false
  episode_len_mean: 335.46
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.546000000000205
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1532
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.45604663717312e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.9052232993973626
          entropy_coeff: 0.009999999999999998
          kl: 0.017594280887833234
          policy_loss: 0.004055410913295216
          total_loss: 0.9758399052752389
          vf_explained_var: 0.03438333421945572
          vf_loss: 0.9908367270396815
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 463000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,463,12278.9,463000,-33.546,-21.9,-55.1,335.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-10-29_00-31-09
  done: false
  episode_len_mean: 336.79
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.67900000000021
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 2
  episodes_total: 1534
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.45604663717312e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.1747391833199394
          entropy_coeff: 0.009999999999999998
          kl: 0.01752193898410444
          policy_loss: -0.09764824542734359
          total_loss: 0.8338483807113436
          vf_explained_var: 0.3719738721847534
          vf_loss: 0.9532440196722746
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 464000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,464,12299.5,464000,-33.679,-21.9,-55.1,336.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-10-29_00-31-30
  done: false
  episode_len_mean: 338.86
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.886000000000216
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1537
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.45604663717312e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.020074869526757
          entropy_coeff: 0.009999999999999998
          kl: 0.012177832445636048
          policy_loss: 0.016426458292537267
          total_loss: 1.0065777358081607
          vf_explained_var: -0.009177186526358128
          vf_loss: 1.0103520138396158
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,465,12320.4,465000,-33.886,-21.9,-55.1,338.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-10-29_00-31-50
  done: false
  episode_len_mean: 340.92
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.09200000000022
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 2
  episodes_total: 1539
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.45604663717312e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.1213095161649917
          entropy_coeff: 0.009999999999999998
          kl: 0.014635219681027363
          policy_loss: -0.07034181861413849
          total_loss: 0.7609211257762379
          vf_explained_var: 0.30211591720581055
          vf_loss: 0.8524760547611449
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 466000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,466,12339.6,466000,-34.092,-21.9,-55.1,340.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-10-29_00-32-10
  done: false
  episode_len_mean: 343.28
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.32800000000022
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1542
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.45604663717312e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.7700730575455559
          entropy_coeff: 0.009999999999999998
          kl: 0.029181926250984504
          policy_loss: -0.11107714689440197
          total_loss: 1.1157102598084343
          vf_explained_var: 0.19212251901626587
          vf_loss: 1.2444881392849816
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,467,12360.1,467000,-34.328,-21.9,-55.1,343.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-10-29_00-32-32
  done: false
  episode_len_mean: 340.73
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.073000000000214
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1545
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.184069955759682e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.9396712938944498
          entropy_coeff: 0.009999999999999998
          kl: 0.015345610696201087
          policy_loss: -0.003249717586570316
          total_loss: 0.8696129040585624
          vf_explained_var: 0.2729058563709259
          vf_loss: 0.8922593292262819
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 468000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,468,12382.2,468000,-34.073,-21.9,-55.1,340.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-10-29_00-32-54
  done: false
  episode_len_mean: 339.66
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -33.96600000000021
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1548
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.184069955759682e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.789155375957489
          entropy_coeff: 0.009999999999999998
          kl: 0.041092278770443295
          policy_loss: -0.027661463452710047
          total_loss: 0.7981019775072734
          vf_explained_var: 0.37810295820236206
          vf_loss: 0.8436549948321448
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,469,12403.6,469000,-33.966,-21.9,-55.1,339.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-10-29_00-33-16
  done: false
  episode_len_mean: 340.78
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -34.0780000000002
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1551
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.227610493363952e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.6291122747792137
          entropy_coeff: 0.009999999999999998
          kl: 0.01582721336495554
          policy_loss: 0.014338726964261797
          total_loss: 0.9085866570472717
          vf_explained_var: 0.5530449748039246
          vf_loss: 0.9105390447709295
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,470,12425.8,470000,-34.078,-21.9,-55.1,340.78


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-10-29_00-33-36
  done: false
  episode_len_mean: 345.3
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -34.53000000000022
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1554
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.227610493363952e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.5582073993153043
          entropy_coeff: 0.009999999999999998
          kl: 0.01468941147444139
          policy_loss: 0.0808584835794237
          total_loss: 0.8454358551237319
          vf_explained_var: 0.23975521326065063
          vf_loss: 0.7801594413403008
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 471000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,471,12445.7,471000,-34.53,-22.1,-55.1,345.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-10-29_00-33-57
  done: false
  episode_len_mean: 347.78
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -34.778000000000226
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 2
  episodes_total: 1556
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.227610493363952e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2786984178755019
          entropy_coeff: 0.009999999999999998
          kl: 0.018877574967555037
          policy_loss: -0.1235454476541943
          total_loss: 1.2590287182066175
          vf_explained_var: -0.3325228691101074
          vf_loss: 1.395361140370369
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 472000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,472,12467,472000,-34.778,-22.1,-55.1,347.78




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-10-29_00-34-36
  done: false
  episode_len_mean: 349.83
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -34.983000000000224
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 4
  episodes_total: 1560
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.227610493363952e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2231827345159318
          entropy_coeff: 0.009999999999999998
          kl: 0.01975254804913222
          policy_loss: 0.02497411130203141
          total_loss: 1.496517256895701
          vf_explained_var: 0.302266001701355
          vf_loss: 1.4837749547428556
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,473,12505.8,473000,-34.983,-22.1,-55.1,349.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-10-29_00-35-03
  done: false
  episode_len_mean: 350.2
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -35.02000000000023
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1563
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.227610493363952e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1728389534685346
          entropy_coeff: 0.009999999999999998
          kl: 0.01644025722881371
          policy_loss: 0.05390540642870797
          total_loss: 0.851681911945343
          vf_explained_var: 0.6102041602134705
          vf_loss: 0.8095049024042156
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,474,12532.8,474000,-35.02,-22.4,-55.1,350.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-10-29_00-35-28
  done: false
  episode_len_mean: 350.15
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -35.01500000000023
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 4
  episodes_total: 1567
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.227610493363952e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.176340521706475
          entropy_coeff: 0.009999999999999998
          kl: 0.008187691600727002
          policy_loss: -0.02676069802708096
          total_loss: 1.3260681920581394
          vf_explained_var: 0.42593705654144287
          vf_loss: 1.3645923031700982
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,475,12558.1,475000,-35.015,-22.4,-55.1,350.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-10-29_00-35-56
  done: false
  episode_len_mean: 349.41
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.94100000000022
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1570
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.227610493363952e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6303162041637632
          entropy_coeff: 0.009999999999999998
          kl: 0.03979436937734704
          policy_loss: -0.11706812745994992
          total_loss: 1.2637775964207119
          vf_explained_var: 0.43685048818588257
          vf_loss: 1.3871488942040338
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,476,12585.9,476000,-34.941,-22,-55.1,349.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-10-29_00-36-20
  done: false
  episode_len_mean: 348.18
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.81800000000023
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 4
  episodes_total: 1574
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8414157400459286e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9084688259495629
          entropy_coeff: 0.009999999999999998
          kl: 0.03115376427738753
          policy_loss: 0.028610776116450628
          total_loss: 0.8700503961907493
          vf_explained_var: 0.6914637684822083
          vf_loss: 0.8505243082841237
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 477000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,477,12609.7,477000,-34.818,-22,-55.1,348.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-10-29_00-36-44
  done: false
  episode_len_mean: 348.0
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.800000000000225
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1577
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7621236100688926e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.277294800678889
          entropy_coeff: 0.009999999999999998
          kl: 0.007988681705881125
          policy_loss: 0.0019101404481463961
          total_loss: 0.6295912404855092
          vf_explained_var: 0.7684987187385559
          vf_loss: 0.6404540436135397
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 478000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,478,12634,478000,-34.8,-22,-55.1,348


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-10-29_00-37-11
  done: false
  episode_len_mean: 345.41
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.541000000000224
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 4
  episodes_total: 1581
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7621236100688926e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1538716912269593
          entropy_coeff: 0.009999999999999998
          kl: 0.01244845161246428
          policy_loss: 0.014373716215292614
          total_loss: 0.7146059426996443
          vf_explained_var: 0.6654508113861084
          vf_loss: 0.7117709414826499
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,479,12661,479000,-34.541,-22,-55.1,345.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-10-29_00-37-38
  done: false
  episode_len_mean: 345.69
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.56900000000022
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 4
  episodes_total: 1585
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7621236100688926e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8503118720319536
          entropy_coeff: 0.009999999999999998
          kl: 0.014831148244531928
          policy_loss: -0.022915952073203193
          total_loss: 1.4273234109083812
          vf_explained_var: 0.3707587420940399
          vf_loss: 1.458742477496465
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 480000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,480,12687.8,480000,-34.569,-22,-55.1,345.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-10-29_00-38-05
  done: false
  episode_len_mean: 345.51
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.55100000000022
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 4
  episodes_total: 1589
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7621236100688926e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0327229665385351
          entropy_coeff: 0.009999999999999998
          kl: 0.023225821231323746
          policy_loss: 0.0112963129248884
          total_loss: 1.2328925430774689
          vf_explained_var: 0.22864361107349396
          vf_loss: 1.2319234596358406
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 481000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,481,12715.1,481000,-34.551,-22,-55.1,345.51




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-10-29_00-38-49
  done: false
  episode_len_mean: 343.86
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -34.386000000000216
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 3
  episodes_total: 1592
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.1431854151033386e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2005756576855977
          entropy_coeff: 0.009999999999999998
          kl: 0.037273415647622064
          policy_loss: 0.0640725363459852
          total_loss: 0.8687975002659691
          vf_explained_var: 0.3898496925830841
          vf_loss: 0.8167307038687998
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 482000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,482,12758.4,482000,-34.386,-22,-55.1,343.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-10-29_00-39-16
  done: false
  episode_len_mean: 339.96
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -33.996000000000215
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 4
  episodes_total: 1596
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.214778122655007e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.330612842241923
          entropy_coeff: 0.009999999999999998
          kl: 0.005687145717742068
          policy_loss: -0.050008874386548996
          total_loss: 1.307860697640313
          vf_explained_var: 0.24969463050365448
          vf_loss: 1.3711757057242924
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 483000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,483,12785.5,483000,-33.996,-22,-55.1,339.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-10-29_00-39-45
  done: false
  episode_len_mean: 335.65
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -33.56500000000021
  episode_reward_min: -55.10000000000051
  episodes_this_iter: 4
  episodes_total: 1600
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.214778122655007e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0149289912647672
          entropy_coeff: 0.009999999999999998
          kl: 0.011016779866834428
          policy_loss: 0.004694423741764492
          total_loss: 1.2457642078399658
          vf_explained_var: 0.0605660080909729
          vf_loss: 1.251219055387709
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,484,12814.6,484000,-33.565,-22,-55.1,335.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-10-29_00-40-13
  done: false
  episode_len_mean: 329.54
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -32.95400000000019
  episode_reward_min: -48.80000000000042
  episodes_this_iter: 4
  episodes_total: 1604
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.214778122655007e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9717013902134366
          entropy_coeff: 0.009999999999999998
          kl: 0.007294759902271785
          policy_loss: 0.05363541800114843
          total_loss: 1.1725887437661489
          vf_explained_var: 0.12822014093399048
          vf_loss: 1.1286703421009912
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,485,12842.8,485000,-32.954,-22,-48.8,329.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-10-29_00-40-42
  done: false
  episode_len_mean: 323.07
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -32.30700000000019
  episode_reward_min: -48.10000000000041
  episodes_this_iter: 4
  episodes_total: 1608
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.214778122655007e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0046497914526198
          entropy_coeff: 0.009999999999999998
          kl: 0.016293576605829686
          policy_loss: 0.006628539578782187
          total_loss: 1.2467855996555752
          vf_explained_var: 0.1581341177225113
          vf_loss: 1.2502035525110033
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,486,12871.8,486000,-32.307,-22,-48.1,323.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-10-29_00-41-10
  done: false
  episode_len_mean: 315.38
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.538000000000178
  episode_reward_min: -45.80000000000038
  episodes_this_iter: 4
  episodes_total: 1612
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.214778122655007e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9673698127269745
          entropy_coeff: 0.009999999999999998
          kl: 0.020272554145896352
          policy_loss: -0.0027740469409359824
          total_loss: 1.333293573061625
          vf_explained_var: 0.11142604798078537
          vf_loss: 1.3457413183318243
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,487,12899.8,487000,-31.538,-22,-45.8,315.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-10-29_00-41-40
  done: false
  episode_len_mean: 310.76
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.07600000000017
  episode_reward_min: -45.60000000000038
  episodes_this_iter: 4
  episodes_total: 1616
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.322167183982512e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8716949489381578
          entropy_coeff: 0.009999999999999998
          kl: 0.016783988005461638
          policy_loss: 0.016855509703358015
          total_loss: 1.3751829942067464
          vf_explained_var: 0.08561135083436966
          vf_loss: 1.367044410440657
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,488,12929.4,488000,-31.076,-22,-45.6,310.76




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-10-29_00-42-25
  done: false
  episode_len_mean: 304.03
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -30.40300000000017
  episode_reward_min: -45.60000000000038
  episodes_this_iter: 4
  episodes_total: 1620
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.322167183982512e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9640894711017609
          entropy_coeff: 0.009999999999999998
          kl: 0.025555130022275958
          policy_loss: -0.048958883268965615
          total_loss: 1.3366701854599847
          vf_explained_var: 0.08651668578386307
          vf_loss: 1.3952699780464173
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,489,12974.9,489000,-30.403,-22,-45.6,304.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-10-29_00-42-54
  done: false
  episode_len_mean: 299.87
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -29.98700000000015
  episode_reward_min: -45.60000000000038
  episodes_this_iter: 4
  episodes_total: 1624
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3983250775973766e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0096807526217566
          entropy_coeff: 0.009999999999999998
          kl: 0.038011315006862625
          policy_loss: -0.01479302400516139
          total_loss: 1.2699180987146166
          vf_explained_var: 0.3392517864704132
          vf_loss: 1.2948079228401184
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,490,13003.9,490000,-29.987,-22,-45.6,299.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-10-29_00-43-24
  done: false
  episode_len_mean: 293.61
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -29.36100000000015
  episode_reward_min: -45.60000000000038
  episodes_this_iter: 4
  episodes_total: 1628
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0974876163960648e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8454864071475134
          entropy_coeff: 0.009999999999999998
          kl: 0.014298168608757243
          policy_loss: -0.03928386916716894
          total_loss: 1.2560536596510146
          vf_explained_var: 0.3168511688709259
          vf_loss: 1.3037923912207285
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,491,13033.6,491000,-29.361,-22,-45.6,293.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-10-29_00-43-54
  done: false
  episode_len_mean: 286.46
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.646000000000132
  episode_reward_min: -40.90000000000031
  episodes_this_iter: 5
  episodes_total: 1633
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0974876163960648e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8731203383869595
          entropy_coeff: 0.009999999999999998
          kl: 0.03129176073487388
          policy_loss: -0.00876670049296485
          total_loss: 1.6335250828001233
          vf_explained_var: 0.2973453998565674
          vf_loss: 1.6510229835907617
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,492,13063.2,492000,-28.646,-22,-40.9,286.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-10-29_00-44-24
  done: false
  episode_len_mean: 281.32
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.132000000000126
  episode_reward_min: -40.90000000000031
  episodes_this_iter: 4
  episodes_total: 1637
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1462314245940973e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8230483750502269
          entropy_coeff: 0.009999999999999998
          kl: 0.017027546452837092
          policy_loss: 0.0700599867436621
          total_loss: 1.0425990985499487
          vf_explained_var: 0.36872655153274536
          vf_loss: 0.9807696143786112
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,493,13093.1,493000,-28.132,-22,-40.9,281.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-10-29_00-44-55
  done: false
  episode_len_mean: 275.1
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.510000000000122
  episode_reward_min: -40.90000000000031
  episodes_this_iter: 4
  episodes_total: 1641
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1462314245940973e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8434086806244321
          entropy_coeff: 0.009999999999999998
          kl: 0.004736286898715945
          policy_loss: 0.04555166678296195
          total_loss: 1.067438589864307
          vf_explained_var: 0.10928178578615189
          vf_loss: 1.0303209986951616
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,494,13124.2,494000,-27.51,-22,-40.9,275.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-10-29_00-45-23
  done: false
  episode_len_mean: 270.2
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.020000000000113
  episode_reward_min: -40.90000000000031
  episodes_this_iter: 4
  episodes_total: 1645
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5731157122970487e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0404595964484744
          entropy_coeff: 0.009999999999999998
          kl: 0.016705706680540625
          policy_loss: -0.08689155214362675
          total_loss: 1.1836616423394946
          vf_explained_var: 0.2971433103084564
          vf_loss: 1.2809577902158102
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 495000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,495,13152.8,495000,-27.02,-22,-40.9,270.2




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-10-29_00-46-11
  done: false
  episode_len_mean: 264.35
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.4350000000001
  episode_reward_min: -40.500000000000306
  episodes_this_iter: 5
  episodes_total: 1650
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5731157122970487e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9546019083923764
          entropy_coeff: 0.009999999999999998
          kl: 0.02277178632721599
          policy_loss: -0.024053883593943386
          total_loss: 1.2863272103998395
          vf_explained_var: 0.29100215435028076
          vf_loss: 1.3199271149105496
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 496000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,496,13200.1,496000,-26.435,-20.7,-40.5,264.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-10-29_00-46-42
  done: false
  episode_len_mean: 259.9
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.9900000000001
  episode_reward_min: -39.30000000000029
  episodes_this_iter: 4
  episodes_total: 1654
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9600717902183533
          entropy_coeff: 0.009999999999999998
          kl: 0.010268315107559387
          policy_loss: 0.02401315685775545
          total_loss: 0.7904483622974819
          vf_explained_var: 0.2230096459388733
          vf_loss: 0.7760359260771009
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,497,13231.2,497000,-25.99,-20.7,-39.3,259.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-10-29_00-47-12
  done: false
  episode_len_mean: 255.98
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.5980000000001
  episode_reward_min: -35.600000000000236
  episodes_this_iter: 4
  episodes_total: 1658
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.984906307193968
          entropy_coeff: 0.009999999999999998
          kl: 0.007620136368761463
          policy_loss: 0.005194590902990765
          total_loss: 1.1104097982247672
          vf_explained_var: 0.14584869146347046
          vf_loss: 1.1150642851988475
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 498000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,498,13261.2,498000,-25.598,-20.7,-35.6,255.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-10-29_00-47-42
  done: false
  episode_len_mean: 252.64
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.264000000000088
  episode_reward_min: -35.600000000000236
  episodes_this_iter: 4
  episodes_total: 1662
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9036643279923333
          entropy_coeff: 0.009999999999999998
          kl: 0.007197675496548249
          policy_loss: 0.010604311898350715
          total_loss: 0.9792370160420736
          vf_explained_var: 0.2639945149421692
          vf_loss: 0.977669362227122
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,499,13291,499000,-25.264,-20.7,-35.6,252.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-10-29_00-48-11
  done: false
  episode_len_mean: 251.95
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.195000000000086
  episode_reward_min: -35.600000000000236
  episodes_this_iter: 4
  episodes_total: 1666
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9322888851165771
          entropy_coeff: 0.009999999999999998
          kl: 0.012107860532842728
          policy_loss: 0.028441839582390254
          total_loss: 0.8860146615240309
          vf_explained_var: 0.2551408112049103
          vf_loss: 0.8668957021501329
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,500,13320.4,500000,-25.195,-20.7,-35.6,251.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-10-29_00-48-42
  done: false
  episode_len_mean: 250.87
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.08700000000009
  episode_reward_min: -35.600000000000236
  episodes_this_iter: 4
  episodes_total: 1670
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6022742840978834
          entropy_coeff: 0.009999999999999998
          kl: 0.005006891537347218
          policy_loss: -0.06582360987861952
          total_loss: 0.9067033853795793
          vf_explained_var: 0.18203803896903992
          vf_loss: 0.9785497354136573
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 501000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,501,13350.8,501000,-25.087,-20.7,-35.6,250.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-10-29_00-49-11
  done: false
  episode_len_mean: 248.66
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -24.86600000000008
  episode_reward_min: -35.600000000000236
  episodes_this_iter: 4
  episodes_total: 1674
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7311130768722958
          entropy_coeff: 0.009999999999999998
          kl: 0.006495172892851681
          policy_loss: -0.11363980323076248
          total_loss: 1.1489414592583975
          vf_explained_var: 0.21549566090106964
          vf_loss: 1.2698923958672417
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,502,13380.1,502000,-24.866,-20.7,-35.6,248.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-10-29_00-49-40
  done: false
  episode_len_mean: 246.0
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -24.600000000000076
  episode_reward_min: -33.70000000000021
  episodes_this_iter: 5
  episodes_total: 1679
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5829127271970113
          entropy_coeff: 0.009999999999999998
          kl: 0.010298153849254197
          policy_loss: -0.018619734131627614
          total_loss: 1.200484373834398
          vf_explained_var: 0.3110610246658325
          vf_loss: 1.2249332328637441
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,503,13409.3,503000,-24.6,-20.7,-33.7,246




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-10-29_00-50-27
  done: false
  episode_len_mean: 245.21
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.521000000000075
  episode_reward_min: -33.70000000000021
  episodes_this_iter: 4
  episodes_total: 1683
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7048182662990358
          entropy_coeff: 0.009999999999999998
          kl: 0.024711424890071023
          policy_loss: 0.029412972182035445
          total_loss: 1.0285553746753269
          vf_explained_var: 0.3496825397014618
          vf_loss: 1.0061905794673496
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,504,13456.6,504000,-24.521,-20.1,-33.7,245.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-10-29_00-50-56
  done: false
  episode_len_mean: 244.69
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.469000000000083
  episode_reward_min: -33.70000000000021
  episodes_this_iter: 4
  episodes_total: 1687
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.539510352668361e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.654300339685546
          entropy_coeff: 0.009999999999999998
          kl: 0.01919696943333141
          policy_loss: 0.03264748549295796
          total_loss: 1.0390665325853559
          vf_explained_var: 0.3198436498641968
          vf_loss: 1.0129620565308466
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,505,13484.8,505000,-24.469,-20.1,-33.7,244.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-10-29_00-51-24
  done: false
  episode_len_mean: 243.83
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.383000000000074
  episode_reward_min: -33.70000000000021
  episodes_this_iter: 4
  episodes_total: 1691
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.539510352668361e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7542085111141205
          entropy_coeff: 0.009999999999999998
          kl: 0.009136827536833063
          policy_loss: 0.018619557552867465
          total_loss: 1.2088337792290582
          vf_explained_var: 0.27551931142807007
          vf_loss: 1.1977563089794583
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,506,13513.1,506000,-24.383,-20.1,-33.7,243.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-10-29_00-51-53
  done: false
  episode_len_mean: 241.77
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.177000000000067
  episode_reward_min: -28.000000000000128
  episodes_this_iter: 4
  episodes_total: 1695
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.539510352668361e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6901201771365272
          entropy_coeff: 0.009999999999999998
          kl: 0.006653021712341574
          policy_loss: 0.006213270003596941
          total_loss: 1.2193456186188592
          vf_explained_var: 0.24728241562843323
          vf_loss: 1.2200335601965586
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,507,13542.1,507000,-24.177,-20.1,-28,241.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-10-29_00-52-20
  done: false
  episode_len_mean: 242.44
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.244000000000074
  episode_reward_min: -33.2000000000002
  episodes_this_iter: 4
  episodes_total: 1699
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.539510352668361e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8490354776382446
          entropy_coeff: 0.009999999999999998
          kl: 0.011757734609287937
          policy_loss: 0.01725860751337475
          total_loss: 1.3630813519159952
          vf_explained_var: 0.33615174889564514
          vf_loss: 1.354313117927975
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,508,13569.2,508000,-24.244,-20.1,-33.2,242.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-10-29_00-52-49
  done: false
  episode_len_mean: 242.53
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.25300000000007
  episode_reward_min: -33.2000000000002
  episodes_this_iter: 4
  episodes_total: 1703
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.539510352668361e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6613587773508496
          entropy_coeff: 0.009999999999999998
          kl: 0.012579460381789037
          policy_loss: 0.03458349704742432
          total_loss: 0.9271622439225514
          vf_explained_var: 0.30722707509994507
          vf_loss: 0.8991923368639416
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,509,13598.5,509000,-24.253,-20.1,-33.2,242.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-10-29_00-53-16
  done: false
  episode_len_mean: 243.19
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.319000000000074
  episode_reward_min: -33.2000000000002
  episodes_this_iter: 3
  episodes_total: 1706
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.539510352668361e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9706651747226716
          entropy_coeff: 0.009999999999999998
          kl: 0.045982589968884
          policy_loss: -0.06702849268913269
          total_loss: 1.517229852411482
          vf_explained_var: 0.2028679996728897
          vf_loss: 1.5939649979273478
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,510,13625.1,510000,-24.319,-20.1,-33.2,243.19




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-10-29_00-54-03
  done: false
  episode_len_mean: 243.41
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.341000000000076
  episode_reward_min: -33.2000000000002
  episodes_this_iter: 5
  episodes_total: 1711
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.309265529002541e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8810997770892249
          entropy_coeff: 0.009999999999999998
          kl: 0.02543250386628652
          policy_loss: -0.011499588439861933
          total_loss: 1.5816508889198304
          vf_explained_var: 0.24723678827285767
          vf_loss: 1.6019614855448405
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,511,13671.8,511000,-24.341,-20.1,-33.2,243.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-10-29_00-54-27
  done: false
  episode_len_mean: 244.42
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.44200000000008
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 3
  episodes_total: 1714
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.963898293503807e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8139766964647505
          entropy_coeff: 0.009999999999999998
          kl: 0.015378807236480323
          policy_loss: 0.021729858385192022
          total_loss: 0.8928690135478974
          vf_explained_var: -0.08228372782468796
          vf_loss: 0.8792789227432675
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,512,13695.7,512000,-24.442,-20.1,-34.7,244.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-10-29_00-54-56
  done: false
  episode_len_mean: 245.03
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.50300000000008
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1718
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.963898293503807e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.737704321079784
          entropy_coeff: 0.009999999999999998
          kl: 0.020029452839654658
          policy_loss: 0.025773310164610545
          total_loss: 1.1553830219639671
          vf_explained_var: 0.40321028232574463
          vf_loss: 1.1369867556624942
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,513,13725.2,513000,-24.503,-20.1,-34.7,245.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-10-29_00-55-23
  done: false
  episode_len_mean: 245.49
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.549000000000078
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1722
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9136513584189945
          entropy_coeff: 0.009999999999999998
          kl: 0.010761000697994029
          policy_loss: -0.028865999397304325
          total_loss: 1.1563351293404898
          vf_explained_var: 0.386281281709671
          vf_loss: 1.1943376421928407
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 514000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,514,13751.7,514000,-24.549,-20.1,-34.7,245.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-10-29_00-55-52
  done: false
  episode_len_mean: 246.02
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.60200000000008
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1726
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7368456919987997
          entropy_coeff: 0.009999999999999998
          kl: 0.01169510989686589
          policy_loss: 0.02943258840176794
          total_loss: 1.0048146367073059
          vf_explained_var: 0.49680769443511963
          vf_loss: 0.9827505005730524
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,515,13780.4,515000,-24.602,-20.1,-34.7,246.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-10-29_00-56-18
  done: false
  episode_len_mean: 247.77
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.77700000000008
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 3
  episodes_total: 1729
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8823658651775784
          entropy_coeff: 0.009999999999999998
          kl: 0.0194869270645703
          policy_loss: 0.017704535358481936
          total_loss: 0.9717774622970157
          vf_explained_var: 0.28991225361824036
          vf_loss: 0.9628965806629922
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 516000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,516,13806.7,516000,-24.777,-20.1,-34.7,247.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-10-29_00-56-46
  done: false
  episode_len_mean: 248.3
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.830000000000087
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1733
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7423294100496504
          entropy_coeff: 0.009999999999999998
          kl: 0.007384485781283937
          policy_loss: -0.02612836617562506
          total_loss: 1.1745796097649468
          vf_explained_var: 0.2445322424173355
          vf_loss: 1.2081312682893541
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,517,13835,517000,-24.83,-20.1,-34.7,248.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-10-29_00-57-15
  done: false
  episode_len_mean: 248.78
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.878000000000085
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1737
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.801133375035392
          entropy_coeff: 0.009999999999999998
          kl: 0.009217806460459757
          policy_loss: -0.012517116549942229
          total_loss: 1.2196771330303615
          vf_explained_var: 0.27544018626213074
          vf_loss: 1.2402055780092875
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,518,13863.9,518000,-24.878,-20.1,-34.7,248.78




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-10-29_00-58-03
  done: false
  episode_len_mean: 249.08
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.908000000000087
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1741
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6670616053872638
          entropy_coeff: 0.009999999999999998
          kl: 0.017605008356956987
          policy_loss: -0.06719321000079313
          total_loss: 0.9286452472209931
          vf_explained_var: 0.5016002655029297
          vf_loss: 1.0025090674559276
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,519,13911.8,519000,-24.908,-20.1,-34.7,249.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-10-29_00-58-28
  done: false
  episode_len_mean: 251.06
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.10600000000008
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1745
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0245648350980547
          entropy_coeff: 0.009999999999999998
          kl: 0.012424931280425888
          policy_loss: -0.03060320590933164
          total_loss: 1.4682229598363241
          vf_explained_var: 0.1801445186138153
          vf_loss: 1.5090718044175042
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,520,13936.9,520000,-25.106,-20.1,-34.7,251.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-10-29_00-58-55
  done: false
  episode_len_mean: 251.42
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.142000000000085
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1749
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6875961204369863
          entropy_coeff: 0.009999999999999998
          kl: 0.006478719090688657
          policy_loss: -0.0203517895605829
          total_loss: 0.9814123385482364
          vf_explained_var: 0.36765992641448975
          vf_loss: 1.008640080690384
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 521000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,521,13964.3,521000,-25.142,-20.1,-34.7,251.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-10-29_00-59-22
  done: false
  episode_len_mean: 252.8
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.28000000000009
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1753
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6009145895640056
          entropy_coeff: 0.009999999999999998
          kl: 0.018833340146314253
          policy_loss: 0.005114774571524726
          total_loss: 1.0783835232257843
          vf_explained_var: 0.24819566309452057
          vf_loss: 1.0792778816488053
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,522,13991,522000,-25.28,-20.1,-34.7,252.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-10-29_00-59-51
  done: false
  episode_len_mean: 253.13
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.31300000000009
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1757
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5428052792946497
          entropy_coeff: 0.009999999999999998
          kl: 0.005194731078028762
          policy_loss: 0.028750474750995635
          total_loss: 1.0415564139684041
          vf_explained_var: 0.3300381898880005
          vf_loss: 1.0182339813974168
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 523000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,523,14020.1,523000,-25.313,-20.1,-34.7,253.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-10-29_01-00-17
  done: false
  episode_len_mean: 253.93
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.393000000000082
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 3
  episodes_total: 1760
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1945847440255717e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6882249375184377
          entropy_coeff: 0.009999999999999998
          kl: 0.02128449803928629
          policy_loss: 0.010698262436522377
          total_loss: 1.004610577887959
          vf_explained_var: -0.17571154236793518
          vf_loss: 1.0007945443193118
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,524,14046,524000,-25.393,-20.1,-34.7,253.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-10-29_01-00-46
  done: false
  episode_len_mean: 255.02
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.502000000000095
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1764
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7918771160383577e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5612641586197747
          entropy_coeff: 0.009999999999999998
          kl: 0.01455627344910025
          policy_loss: 0.0164611231121752
          total_loss: 1.311143206225501
          vf_explained_var: 0.167066290974617
          vf_loss: 1.3002947317229376
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,525,14074.4,525000,-25.502,-20.1,-34.7,255.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-10-29_01-01-13
  done: false
  episode_len_mean: 256.16
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.61600000000009
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1768
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7918771160383577e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7022363821665446
          entropy_coeff: 0.009999999999999998
          kl: 0.031886469774833004
          policy_loss: 0.0577340944773621
          total_loss: 1.4274457030826144
          vf_explained_var: 0.09098231792449951
          vf_loss: 1.3767339878612095
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,526,14101.5,526000,-25.616,-20.1,-34.7,256.16




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-10-29_01-02-00
  done: false
  episode_len_mean: 256.48
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.648000000000096
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1772
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.687815674057536e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7371221191353268
          entropy_coeff: 0.009999999999999998
          kl: 0.034706176152112925
          policy_loss: 0.0459598381486204
          total_loss: 1.4399717860751682
          vf_explained_var: 0.2013709396123886
          vf_loss: 1.4013831595579782
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,527,14148.6,527000,-25.648,-20.1,-34.7,256.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-10-29_01-02-28
  done: false
  episode_len_mean: 257.08
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.708000000000098
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1776
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.031723511086302e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8580605685710907
          entropy_coeff: 0.009999999999999998
          kl: 0.007425569947066811
          policy_loss: -0.003969915790690316
          total_loss: 1.2300164692931705
          vf_explained_var: 0.2562607228755951
          vf_loss: 1.2425669941637252
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,528,14176.4,528000,-25.708,-20.1,-34.7,257.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-10-29_01-02-58
  done: false
  episode_len_mean: 257.69
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.7690000000001
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1780
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.031723511086302e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6784391257497999
          entropy_coeff: 0.009999999999999998
          kl: 0.011910263823865345
          policy_loss: 0.033277785446908736
          total_loss: 0.8812425110075209
          vf_explained_var: 0.5020896196365356
          vf_loss: 0.854749110672209
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 529000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,529,14206.7,529000,-25.769,-21.3,-34.7,257.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-10-29_01-03-27
  done: false
  episode_len_mean: 257.72
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.772000000000098
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1784
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.031723511086302e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9190181838141547
          entropy_coeff: 0.009999999999999998
          kl: 0.24665469902981477
          policy_loss: 0.10909997332427236
          total_loss: 1.438794254594379
          vf_explained_var: 0.34741541743278503
          vf_loss: 1.3388844476805792
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,530,14235.4,530000,-25.772,-21.3,-34.7,257.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-10-29_01-03-54
  done: false
  episode_len_mean: 257.46
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.746000000000098
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1788
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8379827207989163
          entropy_coeff: 0.009999999999999998
          kl: 0.008714160619533038
          policy_loss: 0.06077408782309956
          total_loss: 0.9478633956776725
          vf_explained_var: 0.4725257456302643
          vf_loss: 0.895469140013059
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,531,14262.8,531000,-25.746,-21.3,-34.7,257.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-10-29_01-04-22
  done: false
  episode_len_mean: 257.96
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.7960000000001
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1792
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8118448012404972
          entropy_coeff: 0.009999999999999998
          kl: 0.006274808703929994
          policy_loss: 0.04015752300620079
          total_loss: 0.8614036076598697
          vf_explained_var: 0.39555931091308594
          vf_loss: 0.8293645282586416
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,532,14291,532000,-25.796,-21.3,-34.7,257.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-10-29_01-04-52
  done: false
  episode_len_mean: 258.28
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.8280000000001
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1796
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8644787973827786
          entropy_coeff: 0.009999999999999998
          kl: 0.005012664689686927
          policy_loss: -0.004754600425561269
          total_loss: 0.8953240149550967
          vf_explained_var: 0.42866477370262146
          vf_loss: 0.9087233960628509
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,533,14320.1,533000,-25.828,-21.3,-34.7,258.28




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-10-29_01-05-36
  done: false
  episode_len_mean: 257.41
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.741000000000096
  episode_reward_min: -34.70000000000022
  episodes_this_iter: 4
  episodes_total: 1800
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7905185408062405
          entropy_coeff: 0.009999999999999998
          kl: 0.006759429979400069
          policy_loss: 0.06228859548767408
          total_loss: 0.9536116229163276
          vf_explained_var: 0.48874059319496155
          vf_loss: 0.8992282211780548
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,534,14364.9,534000,-25.741,-21.3,-34.7,257.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-10-29_01-06-04
  done: false
  episode_len_mean: 259.0
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.9000000000001
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 1803
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6794216997093625
          entropy_coeff: 0.009999999999999998
          kl: 0.010454932062568091
          policy_loss: 0.03965669415063328
          total_loss: 0.6618648015790516
          vf_explained_var: 0.10251256078481674
          vf_loss: 0.6290023213252425
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,535,14392.4,535000,-25.9,-21.3,-37.6,259


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-10-29_01-06-32
  done: false
  episode_len_mean: 258.14
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.814000000000096
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1807
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8350239091449314
          entropy_coeff: 0.009999999999999998
          kl: 0.007348827011261407
          policy_loss: 0.021205470710992814
          total_loss: 1.1306384980678559
          vf_explained_var: 0.25607267022132874
          vf_loss: 1.1177832735909357
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,536,14420.9,536000,-25.814,-21.3,-37.6,258.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-10-29_01-06-58
  done: false
  episode_len_mean: 259.97
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.997000000000092
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1811
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7444915460215674
          entropy_coeff: 0.009999999999999998
          kl: 0.010536742394775681
          policy_loss: -0.029853967991140155
          total_loss: 1.4555319666862487
          vf_explained_var: 0.12932269275188446
          vf_loss: 1.492830839422014
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 537000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,537,14446.2,537000,-25.997,-21.3,-37.6,259.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-10-29_01-07-25
  done: false
  episode_len_mean: 259.98
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.998000000000097
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 1814
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6699099633428786
          entropy_coeff: 0.009999999999999998
          kl: 0.005365161093890494
          policy_loss: 0.020134288403722975
          total_loss: 0.9174020876487096
          vf_explained_var: 0.2458924800157547
          vf_loss: 0.9039669002095858
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 538000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,538,14473.3,538000,-25.998,-21.3,-37.6,259.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-10-29_01-07-53
  done: false
  episode_len_mean: 259.94
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.994000000000096
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1818
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6265274786286884
          entropy_coeff: 0.009999999999999998
          kl: 0.009147069199547372
          policy_loss: 0.02315173927280638
          total_loss: 1.2052036722501118
          vf_explained_var: 0.34198465943336487
          vf_loss: 1.1883172061708238
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 539000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,539,14500.9,539000,-25.994,-21.3,-37.6,259.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-10-29_01-08-18
  done: false
  episode_len_mean: 260.0
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -26.0000000000001
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1822
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.047585266629454e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6808949662579431
          entropy_coeff: 0.009999999999999998
          kl: 0.004940836921579717
          policy_loss: 0.050136756069130366
          total_loss: 1.3262456489933863
          vf_explained_var: 0.15665267407894135
          vf_loss: 1.2829178485605452
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 540000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,540,14526.5,540000,-26,-21.3,-37.6,260


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-10-29_01-08-46
  done: false
  episode_len_mean: 259.98
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.998000000000097
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1826
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.023792633314727e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6304028842184278
          entropy_coeff: 0.009999999999999998
          kl: 0.003872454994864416
          policy_loss: 0.00963371636139022
          total_loss: 1.4415390756395128
          vf_explained_var: 0.2219676971435547
          vf_loss: 1.4382093840175205
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 541000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,541,14554.8,541000,-25.998,-21.3,-37.6,259.98




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-10-29_01-09-32
  done: false
  episode_len_mean: 258.21
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.821000000000094
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1830
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5118963166573634e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5440397093693415
          entropy_coeff: 0.009999999999999998
          kl: 0.0031938274736153477
          policy_loss: 0.06557881600326962
          total_loss: 1.2919213440683153
          vf_explained_var: 0.2955171465873718
          vf_loss: 1.2317829344007705
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,542,14600.1,542000,-25.821,-21.3,-37.6,258.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-10-29_01-10-03
  done: false
  episode_len_mean: 258.0
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.800000000000097
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1834
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.559481583286817e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5567415075169669
          entropy_coeff: 0.009999999999999998
          kl: 0.0030452920712039917
          policy_loss: 0.037122337023417155
          total_loss: 1.4895273115899827
          vf_explained_var: 0.26945623755455017
          vf_loss: 1.4579723742273119
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 543000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,543,14631.7,543000,-25.8,-21.3,-37.6,258


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-10-29_01-10-34
  done: false
  episode_len_mean: 257.93
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.7930000000001
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1838
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7797407916434085e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.73150714635849
          entropy_coeff: 0.009999999999999998
          kl: 0.006700649343887951
          policy_loss: 0.04756618738174438
          total_loss: 1.249557876586914
          vf_explained_var: 0.27541810274124146
          vf_loss: 1.2093067606290182
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,544,14661.9,544000,-25.793,-21.3,-37.6,257.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-10-29_01-11-01
  done: false
  episode_len_mean: 258.9
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.890000000000096
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1842
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7797407916434085e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8089474247561561
          entropy_coeff: 0.009999999999999998
          kl: 0.00429170951761664
          policy_loss: -0.015423028833336301
          total_loss: 1.8019001960754395
          vf_explained_var: 0.09088964760303497
          vf_loss: 1.8254127237531874
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,545,14688.8,545000,-25.89,-21.3,-37.6,258.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-10-29_01-11-30
  done: false
  episode_len_mean: 257.19
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.719000000000097
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1846
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8898703958217043e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.698851282066769
          entropy_coeff: 0.009999999999999998
          kl: 0.008932470439343337
          policy_loss: 0.05620979434914059
          total_loss: 1.1006466547648113
          vf_explained_var: 0.5306434035301208
          vf_loss: 1.0514253834883371
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 546000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,546,14718,546000,-25.719,-21.3,-37.6,257.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-10-29_01-11-55
  done: false
  episode_len_mean: 258.72
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.8720000000001
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 1849
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8898703958217043e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7947711957825555
          entropy_coeff: 0.009999999999999998
          kl: 0.020265496572394032
          policy_loss: -0.0836285317937533
          total_loss: 1.5688013792037965
          vf_explained_var: 0.213999405503273
          vf_loss: 1.6603776415189107
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,547,14743.6,547000,-25.872,-21.3,-37.6,258.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-10-29_01-12-24
  done: false
  episode_len_mean: 258.22
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.8220000000001
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1853
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8348055937325574e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6163934280474981
          entropy_coeff: 0.009999999999999998
          kl: 0.009262733711488775
          policy_loss: -0.029141599353816775
          total_loss: 1.0805707964632245
          vf_explained_var: 0.49599960446357727
          vf_loss: 1.1158763190110526
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 548000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,548,14772.6,548000,-25.822,-21.3,-37.6,258.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-10-29_01-12-52
  done: false
  episode_len_mean: 258.68
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.868000000000098
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1857
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8348055937325574e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6230437245633867
          entropy_coeff: 0.009999999999999998
          kl: 0.008066229883756491
          policy_loss: -0.009083162372310957
          total_loss: 1.048129177093506
          vf_explained_var: 0.42119264602661133
          vf_loss: 1.0634427666664124
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 549000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,549,14799.8,549000,-25.868,-21.3,-37.6,258.68




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-10-29_01-13-38
  done: false
  episode_len_mean: 256.95
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.695000000000096
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1861
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8348055937325574e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5359129296408759
          entropy_coeff: 0.009999999999999998
          kl: 0.004529641424514037
          policy_loss: -0.028573265092240438
          total_loss: 1.2134120848443772
          vf_explained_var: 0.3092213571071625
          vf_loss: 1.2473444938659668
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 550000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,550,14846.7,550000,-25.695,-21.3,-37.6,256.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-10-29_01-14-05
  done: false
  episode_len_mean: 258.12
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.812000000000097
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1865
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4174027968662787e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6668383830123478
          entropy_coeff: 0.009999999999999998
          kl: 0.008126414073913206
          policy_loss: 0.015379506183995141
          total_loss: 1.4589441763030158
          vf_explained_var: 0.1694711297750473
          vf_loss: 1.4502330621083577
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 551000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,551,14872.7,551000,-25.812,-21.3,-37.6,258.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-10-29_01-14-33
  done: false
  episode_len_mean: 258.25
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -25.825000000000095
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1869
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4174027968662787e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.585755145880911
          entropy_coeff: 0.009999999999999998
          kl: 0.006808520887577618
          policy_loss: -0.0061251786433988145
          total_loss: 1.180037729607688
          vf_explained_var: 0.3503720462322235
          vf_loss: 1.1920204520225526
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,552,14901.3,552000,-25.825,-21.3,-37.6,258.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-10-29_01-15-02
  done: false
  episode_len_mean: 258.23
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.82300000000009
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1873
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4174027968662787e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.4483383814493815
          entropy_coeff: 0.009999999999999998
          kl: 0.0021292521001959764
          policy_loss: 0.02733061636487643
          total_loss: 0.949335867828793
          vf_explained_var: 0.523370087146759
          vf_loss: 0.9264886352750991
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,553,14929.8,553000,-25.823,-21.6,-37.6,258.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-10-29_01-15-26
  done: false
  episode_len_mean: 259.29
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.9290000000001
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 1876
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.087013984331394e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7301945712831285
          entropy_coeff: 0.009999999999999998
          kl: 0.009952000849842099
          policy_loss: 0.02362236786219809
          total_loss: 1.1357330517636406
          vf_explained_var: -0.0911269262433052
          vf_loss: 1.1194126351012124
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,554,14953.8,554000,-25.929,-21.6,-37.6,259.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-10-29_01-15-50
  done: false
  episode_len_mean: 262.0
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.2000000000001
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 1879
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.087013984331394e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7193557355138991
          entropy_coeff: 0.009999999999999998
          kl: 0.007482105132522933
          policy_loss: 0.0569407989581426
          total_loss: 1.0261786371469497
          vf_explained_var: -0.185310497879982
          vf_loss: 0.9764314059582021
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 555000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,555,14977.6,555000,-26.2,-21.6,-37.6,262


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-10-29_01-16-16
  done: false
  episode_len_mean: 263.43
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.34300000000011
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1883
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.087013984331394e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7368610580762227
          entropy_coeff: 0.009999999999999998
          kl: 0.00524570232219964
          policy_loss: 0.004860483192735248
          total_loss: 1.7533973508410983
          vf_explained_var: 0.03608391061425209
          vf_loss: 1.755905470583174
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 556000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,556,15003.6,556000,-26.343,-21.6,-37.6,263.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-10-29_01-16-44
  done: false
  episode_len_mean: 263.66
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.36600000000011
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 1886
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.087013984331394e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5775694648424784
          entropy_coeff: 0.009999999999999998
          kl: 0.007061103974637146
          policy_loss: -0.0686463228530354
          total_loss: 1.4104849073621961
          vf_explained_var: 0.09618106484413147
          vf_loss: 1.484906914499071
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,557,15031.9,557000,-26.366,-21.6,-37.6,263.66




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-10-29_01-17-28
  done: false
  episode_len_mean: 264.18
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.41800000000011
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1890
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.087013984331394e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5987218032280605
          entropy_coeff: 0.009999999999999998
          kl: 0.0036892747756063216
          policy_loss: 0.03866226979427868
          total_loss: 1.5302783224317762
          vf_explained_var: 0.0096149742603302
          vf_loss: 1.4976032786899143
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 558000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,558,15075.9,558000,-26.418,-21.6,-37.6,264.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-10-29_01-17-58
  done: false
  episode_len_mean: 264.19
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.419000000000107
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 4
  episodes_total: 1894
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.543506992165697e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.4835339579317305
          entropy_coeff: 0.009999999999999998
          kl: 0.021410428503277776
          policy_loss: 0.017389640791548623
          total_loss: 1.6732499241828918
          vf_explained_var: 0.15474152565002441
          vf_loss: 1.6606956005096436
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 559000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,559,15105.6,559000,-26.419,-21.6,-37.6,264.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-10-29_01-18-23
  done: false
  episode_len_mean: 265.4
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.54000000000011
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 1897
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.315260488248543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9914035843478308
          entropy_coeff: 0.009999999999999998
          kl: 0.5036415213145845
          policy_loss: 0.02266515369216601
          total_loss: 1.9109705501132541
          vf_explained_var: 0.054961878806352615
          vf_loss: 1.8982194794548883
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 560000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,560,15131.1,560000,-26.54,-21.6,-37.6,265.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-10-29_01-18-46
  done: false
  episode_len_mean: 267.96
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.79600000000011
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 1900
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9206590824657016
          entropy_coeff: 0.009999999999999998
          kl: 0.019887060925848726
          policy_loss: -0.09094542082813051
          total_loss: 1.585825298892127
          vf_explained_var: 0.02738717384636402
          vf_loss: 1.685977323849996
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 561000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,561,15154.1,561000,-26.796,-21.6,-37.6,267.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-10-29_01-19-09
  done: false
  episode_len_mean: 268.85
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.885000000000108
  episode_reward_min: -35.90000000000024
  episodes_this_iter: 3
  episodes_total: 1903
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.913962236377928
          entropy_coeff: 0.009999999999999998
          kl: 0.00553905331798388
          policy_loss: -0.10471178657478757
          total_loss: 1.4562932835684883
          vf_explained_var: 0.0761999636888504
          vf_loss: 1.5701446957058376
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,562,15176.6,562000,-26.885,-21.6,-35.9,268.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-10-29_01-19-31
  done: false
  episode_len_mean: 270.55
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.05500000000011
  episode_reward_min: -35.90000000000024
  episodes_this_iter: 3
  episodes_total: 1906
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8554439650641548
          entropy_coeff: 0.009999999999999998
          kl: 0.007510506857449192
          policy_loss: -0.07687113285064698
          total_loss: 1.0841604510943095
          vf_explained_var: 0.06083926558494568
          vf_loss: 1.1695860193835363
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,563,15199.1,563000,-27.055,-21.6,-35.9,270.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-10-29_01-19-51
  done: false
  episode_len_mean: 273.5
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.350000000000115
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1909
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.844409257835812
          entropy_coeff: 0.009999999999999998
          kl: 0.011620329457417464
          policy_loss: 0.05006150305271149
          total_loss: 0.872595528099272
          vf_explained_var: -0.0084400475025177
          vf_loss: 0.8309781149029731
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 564000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,564,15219.1,564000,-27.35,-21.6,-46.5,273.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-10-29_01-20-15
  done: false
  episode_len_mean: 274.39
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.439000000000124
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1912
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7731789761119419
          entropy_coeff: 0.009999999999999998
          kl: 0.006150081015129061
          policy_loss: -0.11020433803399404
          total_loss: 1.5224100391070048
          vf_explained_var: 0.08215483278036118
          vf_loss: 1.6403461946381463
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 565000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,565,15242.8,565000,-27.439,-21.6,-46.5,274.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-10-29_01-20-39
  done: false
  episode_len_mean: 276.69
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.669000000000125
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1915
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7028204931153191
          entropy_coeff: 0.009999999999999998
          kl: 0.007539133390471421
          policy_loss: -0.10241647147470051
          total_loss: 1.491897275712755
          vf_explained_var: 0.07438682019710541
          vf_loss: 1.6013419522179497
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 566000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,566,15266.6,566000,-27.669,-21.6,-46.5,276.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-10-29_01-21-00
  done: false
  episode_len_mean: 278.27
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.827000000000126
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1918
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6930365284283956
          entropy_coeff: 0.009999999999999998
          kl: 0.009911022978134456
          policy_loss: -0.1083768602874544
          total_loss: 1.4577206863297356
          vf_explained_var: 0.08131518214941025
          vf_loss: 1.5730278982056511
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,567,15288.2,567000,-27.827,-21.6,-46.5,278.27




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-10-29_01-21-47
  done: false
  episode_len_mean: 279.15
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.915000000000123
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1922
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6240725815296173
          entropy_coeff: 0.009999999999999998
          kl: 0.00719349836513064
          policy_loss: -0.0024985387921333314
          total_loss: 1.482583937380049
          vf_explained_var: 0.12748970091342926
          vf_loss: 1.4913232101334466
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 568000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,568,15334.8,568000,-27.915,-21.6,-46.5,279.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-10-29_01-22-10
  done: false
  episode_len_mean: 280.04
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.00400000000013
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1926
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8319495413038466
          entropy_coeff: 0.009999999999999998
          kl: 0.007563781085105633
          policy_loss: 0.027916593849658965
          total_loss: 1.543051572640737
          vf_explained_var: 0.062144652009010315
          vf_loss: 1.52345449924469
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,569,15358.2,569000,-28.004,-21.6,-46.5,280.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-10-29_01-22-33
  done: false
  episode_len_mean: 283.03
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.303000000000132
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1929
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8124768396218618
          entropy_coeff: 0.009999999999999998
          kl: 0.01946077494590964
          policy_loss: 0.05450328505701489
          total_loss: 0.9955402036507924
          vf_explained_var: 0.048764001578092575
          vf_loss: 0.949161685589287
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,570,15380.4,570000,-28.303,-21.6,-46.5,283.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-10-29_01-22-57
  done: false
  episode_len_mean: 285.25
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.525000000000137
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1932
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7775771041711171
          entropy_coeff: 0.009999999999999998
          kl: 0.004840952864005496
          policy_loss: 0.05590709025661151
          total_loss: 0.9741494721836514
          vf_explained_var: 0.08220301568508148
          vf_loss: 0.926018161740568
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,571,15404.9,571000,-28.525,-21.6,-46.5,285.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-10-29_01-23-25
  done: false
  episode_len_mean: 285.47
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.547000000000136
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1936
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.986445366186408e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6169721341795391
          entropy_coeff: 0.009999999999999998
          kl: 0.005214862279376812
          policy_loss: 0.010106733855273988
          total_loss: 1.5847336226039463
          vf_explained_var: 0.08749239891767502
          vf_loss: 1.5807966219054328
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 572000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,572,15432.9,572000,-28.547,-21.6,-46.5,285.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-10-29_01-23-51
  done: false
  episode_len_mean: 286.12
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.612000000000133
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1939
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.986445366186408e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.6124821719196107
          entropy_coeff: 0.009999999999999998
          kl: 0.0043548539422793115
          policy_loss: -0.10788963387409846
          total_loss: 1.3897230664889018
          vf_explained_var: 0.07543708384037018
          vf_loss: 1.5037375171979268
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 573000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,573,15459,573000,-28.612,-21.6,-46.5,286.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-10-29_01-24-20
  done: false
  episode_len_mean: 286.03
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.603000000000133
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1943
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.993222683093204e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5481600181923972
          entropy_coeff: 0.009999999999999998
          kl: 0.0030317466963212937
          policy_loss: -0.075684345430798
          total_loss: 1.5123450954755147
          vf_explained_var: 0.09077401459217072
          vf_loss: 1.5935110688209533
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 574000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,574,15488,574000,-28.603,-21.6,-46.5,286.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-10-29_01-24-47
  done: false
  episode_len_mean: 286.4
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.640000000000136
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1947
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.96611341546602e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6097440689802169
          entropy_coeff: 0.009999999999999998
          kl: 0.004785955833051306
          policy_loss: 0.029897396432028878
          total_loss: 1.1332990301979913
          vf_explained_var: 0.2552022635936737
          vf_loss: 1.1094990889231364
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 575000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,575,15515,575000,-28.64,-21.6,-46.5,286.4




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-10-29_01-25-33
  done: false
  episode_len_mean: 285.22
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.52200000000014
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1951
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.98305670773301e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5308014879624049
          entropy_coeff: 0.009999999999999998
          kl: 0.004655338449751017
          policy_loss: 0.025740825964344873
          total_loss: 1.2067374613549975
          vf_explained_var: 0.3989051282405853
          vf_loss: 1.1863046520286136
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 576000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,576,15560.4,576000,-28.522,-21.6,-46.5,285.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-10-29_01-26-02
  done: false
  episode_len_mean: 284.97
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.49700000000014
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1955
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.491528353866505e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5803316871325175
          entropy_coeff: 0.009999999999999998
          kl: 0.01001726486243266
          policy_loss: -0.002108519689904319
          total_loss: 1.2768784814410739
          vf_explained_var: 0.21694202721118927
          vf_loss: 1.284790329138438
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 577000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,577,15589.8,577000,-28.497,-21.6,-46.5,284.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-10-29_01-26-29
  done: false
  episode_len_mean: 284.86
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -28.486000000000132
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1959
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.491528353866505e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6149820215172238
          entropy_coeff: 0.009999999999999998
          kl: 0.045047359066367594
          policy_loss: 0.021399466196695964
          total_loss: 1.1374891102313995
          vf_explained_var: 0.4106714129447937
          vf_loss: 1.122239468495051
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 578000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,578,15616.4,578000,-28.486,-21.6,-46.5,284.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-10-29_01-26-55
  done: false
  episode_len_mean: 285.62
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.562000000000136
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1963
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.737292530799757e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6942756772041321
          entropy_coeff: 0.009999999999999998
          kl: 0.009586979415318768
          policy_loss: 0.019760129435194864
          total_loss: 1.432890240351359
          vf_explained_var: 0.14831294119358063
          vf_loss: 1.420072857538859
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 579000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,579,15642.1,579000,-28.562,-21.8,-46.5,285.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-10-29_01-27-22
  done: false
  episode_len_mean: 285.26
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.526000000000135
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1966
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.737292530799757e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.666740937365426
          entropy_coeff: 0.009999999999999998
          kl: 0.00733977418906342
          policy_loss: -0.12022520866658952
          total_loss: 0.988629819949468
          vf_explained_var: 0.4125955402851105
          vf_loss: 1.115522434976366
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 580000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,580,15669.2,580000,-28.526,-21.8,-46.5,285.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-10-29_01-27-46
  done: false
  episode_len_mean: 287.07
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.707000000000136
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1970
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.737292530799757e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8344315807024638
          entropy_coeff: 0.009999999999999998
          kl: 0.013116386734616844
          policy_loss: 0.015163772387637033
          total_loss: 1.5171179360813565
          vf_explained_var: 0.08070451766252518
          vf_loss: 1.5102984799279107
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 581000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,581,15693.9,581000,-28.707,-21.8,-46.5,287.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-10-29_01-28-14
  done: false
  episode_len_mean: 287.19
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.719000000000143
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 3
  episodes_total: 1973
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.737292530799757e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6589241140418582
          entropy_coeff: 0.009999999999999998
          kl: 0.009045054879364045
          policy_loss: -0.12318830788135529
          total_loss: 1.1146649989816877
          vf_explained_var: 0.3706305921077728
          vf_loss: 1.2444425582885743
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 582000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,582,15721,582000,-28.719,-21.8,-46.5,287.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-10-29_01-28-41
  done: false
  episode_len_mean: 285.88
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.58800000000014
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1977
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.737292530799757e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7185960398779975
          entropy_coeff: 0.009999999999999998
          kl: 0.004355120257750607
          policy_loss: -0.022196512834893333
          total_loss: 1.3106320169236925
          vf_explained_var: 0.25522398948669434
          vf_loss: 1.3400144961145188
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 583000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,583,15748.6,583000,-28.588,-21.8,-46.5,285.88




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-10-29_01-29-29
  done: false
  episode_len_mean: 283.68
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.368000000000134
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1981
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8686462653998786e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7615043474568262
          entropy_coeff: 0.009999999999999998
          kl: 0.013665150979365572
          policy_loss: -0.01531491842534807
          total_loss: 1.1185914860831367
          vf_explained_var: 0.37050309777259827
          vf_loss: 1.141521453195148
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,584,15796,584000,-28.368,-21.8,-46.5,283.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-10-29_01-29-58
  done: false
  episode_len_mean: 282.76
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.276000000000128
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1985
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8686462653998786e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7184020386801826
          entropy_coeff: 0.009999999999999998
          kl: 0.011863071848801275
          policy_loss: -0.11971241997347938
          total_loss: 1.3195430682765112
          vf_explained_var: 0.4572891294956207
          vf_loss: 1.446439508597056
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 585000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,585,15825,585000,-28.276,-21.8,-46.5,282.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-10-29_01-30-27
  done: false
  episode_len_mean: 281.5
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.150000000000134
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 5
  episodes_total: 1990
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8686462653998786e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6184708671437369
          entropy_coeff: 0.009999999999999998
          kl: 0.07532761872251652
          policy_loss: 0.013100725081231859
          total_loss: 1.9060214506255255
          vf_explained_var: 0.3277680575847626
          vf_loss: 1.8991054111056858
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 586000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,586,15854.5,586000,-28.15,-22,-46.5,281.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-10-29_01-30-59
  done: false
  episode_len_mean: 280.1
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -28.010000000000122
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1994
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.802969398099818e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5499357693725162
          entropy_coeff: 0.009999999999999998
          kl: 0.0031400257260963196
          policy_loss: 0.052584149398737484
          total_loss: 1.375282449192471
          vf_explained_var: 0.1913924366235733
          vf_loss: 1.3281976660092671
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 587000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,587,15886.1,587000,-28.01,-21.7,-46.5,280.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-10-29_01-31-28
  done: false
  episode_len_mean: 277.88
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -27.78800000000012
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 1998
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.401484699049909e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6887138267358144
          entropy_coeff: 0.009999999999999998
          kl: 0.004372760345017134
          policy_loss: -0.03304339353409078
          total_loss: 1.3420273277494643
          vf_explained_var: 0.3459928035736084
          vf_loss: 1.3819578674104478
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 588000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,588,15915.3,588000,-27.788,-21.7,-46.5,277.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-10-29_01-32-00
  done: false
  episode_len_mean: 272.33
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -27.233000000000114
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 5
  episodes_total: 2003
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.007423495249545e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5245134601990382
          entropy_coeff: 0.009999999999999998
          kl: 0.011269765819544059
          policy_loss: -0.0347454326848189
          total_loss: 2.00507335530387
          vf_explained_var: -0.03474796190857887
          vf_loss: 2.0450639128684998
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 589000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,589,15946.9,589000,-27.233,-21.7,-46.5,272.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-10-29_01-32-31
  done: false
  episode_len_mean: 268.04
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -26.80400000000012
  episode_reward_min: -46.50000000000039
  episodes_this_iter: 4
  episodes_total: 2007
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.007423495249545e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.41684528357452816
          entropy_coeff: 0.009999999999999998
          kl: 0.0021616773041671423
          policy_loss: -0.05040856065849463
          total_loss: 1.2480663922097948
          vf_explained_var: 0.19485585391521454
          vf_loss: 1.302643398443858
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 590000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,590,15978.2,590000,-26.804,-21.7,-46.5,268.04




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-10-29_01-33-20
  done: false
  episode_len_mean: 262.63
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.263000000000105
  episode_reward_min: -44.30000000000036
  episodes_this_iter: 5
  episodes_total: 2012
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5037117476247723e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.4165947374370363
          entropy_coeff: 0.009999999999999998
          kl: 0.004265965179926291
          policy_loss: -0.003480037881268395
          total_loss: 1.674177689022488
          vf_explained_var: 0.2685765027999878
          vf_loss: 1.6818236728509268
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 591000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,591,16027,591000,-26.263,-19.3,-44.3,262.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-10-29_01-33-52
  done: false
  episode_len_mean: 257.96
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -25.796000000000095
  episode_reward_min: -39.60000000000029
  episodes_this_iter: 4
  episodes_total: 2016
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7518558738123861e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.3736957584818204
          entropy_coeff: 0.009999999999999998
          kl: 0.0016758511671808504
          policy_loss: -0.01791568762726254
          total_loss: 1.4394832134246827
          vf_explained_var: 0.11209984868764877
          vf_loss: 1.4611358708805509
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 592000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,592,16058.9,592000,-25.796,-19.3,-39.6,257.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-10-29_01-34-24
  done: false
  episode_len_mean: 253.63
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -25.36300000000009
  episode_reward_min: -39.60000000000029
  episodes_this_iter: 5
  episodes_total: 2021
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.759279369061931e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.32860597405168745
          entropy_coeff: 0.009999999999999998
          kl: 0.0018587001847587695
          policy_loss: -0.015962729189130994
          total_loss: 1.8031413224008348
          vf_explained_var: 0.16381044685840607
          vf_loss: 1.8223901006910537
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 593000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,593,16091.1,593000,-25.363,-19.3,-39.6,253.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-10-29_01-34-54
  done: false
  episode_len_mean: 251.94
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -25.19400000000009
  episode_reward_min: -39.60000000000029
  episodes_this_iter: 4
  episodes_total: 2025
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3796396845309654e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.6374261058039136
          entropy_coeff: 0.009999999999999998
          kl: 0.005694149535354774
          policy_loss: 0.027609074695242777
          total_loss: 0.9555071135361989
          vf_explained_var: 0.4860442280769348
          vf_loss: 0.9342722958988614
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 594000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,594,16121.1,594000,-25.194,-19.3,-39.6,251.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-10-29_01-35-26
  done: false
  episode_len_mean: 246.84
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -24.68400000000008
  episode_reward_min: -33.3000000000002
  episodes_this_iter: 5
  episodes_total: 2030
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3796396845309654e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.23728444145785438
          entropy_coeff: 0.009999999999999998
          kl: 0.0020513764531464238
          policy_loss: -0.02399107524090343
          total_loss: 1.8751477148797777
          vf_explained_var: 0.2100895494222641
          vf_loss: 1.9015116161770291
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 595000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,595,16153,595000,-24.684,-19.3,-33.3,246.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-10-29_01-35-56
  done: false
  episode_len_mean: 244.44
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -24.444000000000077
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 2034
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1898198422654827e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.2157437675529056
          entropy_coeff: 0.009999999999999998
          kl: 0.0016425973617869103
          policy_loss: 0.0344990671508842
          total_loss: 1.355836072232988
          vf_explained_var: 0.0996369868516922
          vf_loss: 1.3234944257471297
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 596000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,596,16183.6,596000,-24.444,-19.3,-32.5,244.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-10-29_01-36-28
  done: false
  episode_len_mean: 242.28
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -24.22800000000007
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 5
  episodes_total: 2039
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0949099211327413e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.2840823279486762
          entropy_coeff: 0.009999999999999998
          kl: 0.00035319143466649744
          policy_loss: -0.014209125108189053
          total_loss: 1.8885468575689528
          vf_explained_var: 0.15445560216903687
          vf_loss: 1.9055968311097886
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 59700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,597,16215.6,597000,-24.228,-19.3,-32.5,242.28




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-10-29_01-37-19
  done: false
  episode_len_mean: 240.18
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -24.01800000000007
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 2043
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.474549605663707e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.2079879793855879
          entropy_coeff: 0.009999999999999998
          kl: 0.0004978546504687703
          policy_loss: -0.03126236299673716
          total_loss: 1.4502969159020318
          vf_explained_var: 0.062369924038648605
          vf_loss: 1.4836391475465562
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,598,16266.6,598000,-24.018,-18.9,-32.5,240.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-10-29_01-37-51
  done: false
  episode_len_mean: 238.19
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.81900000000007
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 5
  episodes_total: 2048
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7372748028318534e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.2611785343951649
          entropy_coeff: 0.009999999999999998
          kl: 0.0009608307190859478
          policy_loss: -0.0007426864571041531
          total_loss: 1.789644111527337
          vf_explained_var: 0.12471247464418411
          vf_loss: 1.7929985920588176
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 599000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,599,16298.3,599000,-23.819,-18.9,-32.5,238.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-10-29_01-38-23
  done: false
  episode_len_mean: 236.44
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.644000000000066
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 5
  episodes_total: 2053
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3686374014159267e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.20177602387136884
          entropy_coeff: 0.009999999999999998
          kl: 0.0014131624034701832
          policy_loss: 0.010947379966576893
          total_loss: 1.610379195213318
          vf_explained_var: 0.039110030978918076
          vf_loss: 1.601449598206414
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,600,16330.3,600000,-23.644,-18.9,-32.5,236.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-10-29_01-38-55
  done: false
  episode_len_mean: 235.1
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.51000000000006
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 2057
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.8431870070796334e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.2421166721317503
          entropy_coeff: 0.009999999999999998
          kl: 0.00116216238780156
          policy_loss: 0.023833554900354808
          total_loss: 1.4167349020640054
          vf_explained_var: 0.05826835706830025
          vf_loss: 1.3953225162294176
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 601000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,601,16361.9,601000,-23.51,-18.9,-32.5,235.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-10-29_01-39-27
  done: false
  episode_len_mean: 232.92
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.29200000000005
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 5
  episodes_total: 2062
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4215935035398167e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.20968659884399837
          entropy_coeff: 0.009999999999999998
          kl: 0.000430467472620598
          policy_loss: -0.013395939767360688
          total_loss: 1.683731398317549
          vf_explained_var: 0.16902901232242584
          vf_loss: 1.6992242217063904
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 602000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,602,16393.9,602000,-23.292,-18.9,-31.1,232.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-10-29_01-39-59
  done: false
  episode_len_mean: 231.03
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -23.103000000000055
  episode_reward_min: -31.100000000000172
  episodes_this_iter: 4
  episodes_total: 2066
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7107967517699083e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.1832592515481843
          entropy_coeff: 0.009999999999999998
          kl: 0.0011287627082189407
          policy_loss: 0.0353521182305283
          total_loss: 1.3562531603707209
          vf_explained_var: 0.10433769971132278
          vf_loss: 1.3227336366971334
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 603000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,603,16426.2,603000,-23.103,-18.9,-31.1,231.03




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-10-29_01-40-49
  done: false
  episode_len_mean: 227.26
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.72600000000006
  episode_reward_min: -30.30000000000016
  episodes_this_iter: 5
  episodes_total: 2071
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.553983758849542e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.16792659196588727
          entropy_coeff: 0.009999999999999998
          kl: 0.0009077148115663488
          policy_loss: -0.002635783122645484
          total_loss: 1.5986895574463738
          vf_explained_var: 0.22635550796985626
          vf_loss: 1.6030046224594117
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 604000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,604,16475.8,604000,-22.726,-18.9,-30.3,227.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-10-29_01-41-21
  done: false
  episode_len_mean: 225.03
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.503000000000053
  episode_reward_min: -27.700000000000124
  episodes_this_iter: 4
  episodes_total: 2075
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.276991879424771e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.19959126694334878
          entropy_coeff: 0.009999999999999998
          kl: 0.0006954175873430143
          policy_loss: -0.005364697012636397
          total_loss: 1.5667758517795138
          vf_explained_var: 0.11377472430467606
          vf_loss: 1.5741364690992568
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 605000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,605,16507.7,605000,-22.503,-18.9,-27.7,225.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-10-29_01-41-53
  done: false
  episode_len_mean: 223.41
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.341000000000044
  episode_reward_min: -26.500000000000107
  episodes_this_iter: 5
  episodes_total: 2080
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1384959397123854e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.21045168803797829
          entropy_coeff: 0.009999999999999998
          kl: 0.0013255947988919084
          policy_loss: -0.033440936439567144
          total_loss: 1.8627931396166484
          vf_explained_var: 0.15959198772907257
          vf_loss: 1.89833861456977
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 606000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,606,16539.6,606000,-22.341,-18.9,-26.5,223.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-10-29_01-42-25
  done: false
  episode_len_mean: 222.12
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.21200000000005
  episode_reward_min: -26.500000000000107
  episodes_this_iter: 5
  episodes_total: 2085
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0692479698561927e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.20149514890379375
          entropy_coeff: 0.009999999999999998
          kl: 0.0011402634111844872
          policy_loss: -0.021497609631882773
          total_loss: 1.6243641720877753
          vf_explained_var: 0.033321183174848557
          vf_loss: 1.6478767289055718
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 607000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,607,16572,607000,-22.212,-18.9,-26.5,222.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-10-29_01-42-57
  done: false
  episode_len_mean: 221.36
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.136000000000053
  episode_reward_min: -26.500000000000107
  episodes_this_iter: 4
  episodes_total: 2089
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3462398492809636e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.2283031102683809
          entropy_coeff: 0.009999999999999998
          kl: 0.0007588643343473854
          policy_loss: 0.06579265230231815
          total_loss: 0.9921469516224332
          vf_explained_var: 0.059474628418684006
          vf_loss: 0.928637315498458
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 608000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,608,16604.2,608000,-22.136,-18.9,-26.5,221.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-10-29_01-43-29
  done: false
  episode_len_mean: 220.84
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.084000000000046
  episode_reward_min: -26.500000000000107
  episodes_this_iter: 5
  episodes_total: 2094
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6731199246404818e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.1761880624625418
          entropy_coeff: 0.009999999999999998
          kl: 0.0011274901186418952
          policy_loss: -0.009649966408809026
          total_loss: 1.662232099639045
          vf_explained_var: 0.11550482362508774
          vf_loss: 1.673643950621287
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 609000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,609,16635.6,609000,-22.084,-18.9,-26.5,220.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-10-29_01-44-01
  done: false
  episode_len_mean: 220.34
  episode_media: {}
  episode_reward_max: -18.9
  episode_reward_mean: -22.03400000000004
  episode_reward_min: -26.1000000000001
  episodes_this_iter: 4
  episodes_total: 2098
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3365599623202409e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.19515891869862875
          entropy_coeff: 0.009999999999999998
          kl: 0.00037052675149264284
          policy_loss: 0.029223568820291094
          total_loss: 1.3801160004403856
          vf_explained_var: 0.09780661016702652
          vf_loss: 1.352844026353624
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 610000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,610,16667.6,610000,-22.034,-18.9,-26.1,220.34




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-10-29_01-44-51
  done: false
  episode_len_mean: 219.79
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.97900000000004
  episode_reward_min: -26.1000000000001
  episodes_this_iter: 5
  episodes_total: 2103
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.6827998116012045e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.15443196727169883
          entropy_coeff: 0.009999999999999998
          kl: 0.0009376437377622122
          policy_loss: -0.004439856691492929
          total_loss: 1.744202368789249
          vf_explained_var: 0.12701740860939026
          vf_loss: 1.7501865532663134
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 611000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,611,16717.8,611000,-21.979,-18.7,-26.1,219.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-10-29_01-45-23
  done: false
  episode_len_mean: 219.31
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.93100000000004
  episode_reward_min: -25.200000000000088
  episodes_this_iter: 5
  episodes_total: 2108
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3413999058006022e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.19796843710872863
          entropy_coeff: 0.009999999999999998
          kl: 0.0005968849673107969
          policy_loss: 0.041637531336810855
          total_loss: 1.4939988599883185
          vf_explained_var: 0.0757981538772583
          vf_loss: 1.4543409996562533
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 612000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,612,16750.3,612000,-21.931,-18.7,-25.2,219.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-10-29_01-45-56
  done: false
  episode_len_mean: 219.32
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.932000000000034
  episode_reward_min: -25.200000000000088
  episodes_this_iter: 4
  episodes_total: 2112
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6706999529003011e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.23382427444060644
          entropy_coeff: 0.009999999999999998
          kl: 0.010356029301510953
          policy_loss: 0.033626571545998254
          total_loss: 1.2719832049475777
          vf_explained_var: 0.1616249978542328
          vf_loss: 1.2406948804855347
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 613000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,613,16782.7,613000,-21.932,-18.7,-25.2,219.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-10-29_01-46-27
  done: false
  episode_len_mean: 219.56
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.95600000000004
  episode_reward_min: -25.200000000000088
  episodes_this_iter: 5
  episodes_total: 2117
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6706999529003011e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.2910308352775044
          entropy_coeff: 0.009999999999999998
          kl: 0.0014994176358290663
          policy_loss: -0.00393727935022778
          total_loss: 1.6693920267952813
          vf_explained_var: 0.11274290084838867
          vf_loss: 1.6762396070692274
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 614000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,614,16813.6,614000,-21.956,-18.7,-25.2,219.56


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-10-29_01-46-59
  done: false
  episode_len_mean: 219.63
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.963000000000044
  episode_reward_min: -25.200000000000088
  episodes_this_iter: 4
  episodes_total: 2121
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.353499764501506e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.22092187073495653
          entropy_coeff: 0.009999999999999998
          kl: 0.0018875537190811858
          policy_loss: 0.02939230911433697
          total_loss: 1.1881435950597128
          vf_explained_var: 0.10161534696817398
          vf_loss: 1.1609605034192403
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,615,16845.9,615000,-21.963,-18.7,-25.2,219.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-10-29_01-47-31
  done: false
  episode_len_mean: 218.9
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.89000000000004
  episode_reward_min: -22.900000000000055
  episodes_this_iter: 5
  episodes_total: 2126
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.176749882250753e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.23812619042065408
          entropy_coeff: 0.009999999999999998
          kl: 0.0013262933462029262
          policy_loss: -0.025597581598493787
          total_loss: 1.6068456252415975
          vf_explained_var: 0.16515609622001648
          vf_loss: 1.6348244627316793
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,616,16877.3,616000,-21.89,-18.7,-22.9,218.9




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-10-29_01-48-18
  done: false
  episode_len_mean: 219.03
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.90300000000004
  episode_reward_min: -23.400000000000063
  episodes_this_iter: 4
  episodes_total: 2130
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0883749411253764e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.3395768506659402
          entropy_coeff: 0.009999999999999998
          kl: 0.0029057756873907435
          policy_loss: 0.02316751471824116
          total_loss: 1.2224732716878255
          vf_explained_var: 0.10952387005090714
          vf_loss: 1.2027015268802643
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 617000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,617,16925,617000,-21.903,-18.7,-23.4,219.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-10-29_01-48-51
  done: false
  episode_len_mean: 219.2
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.92000000000004
  episode_reward_min: -23.400000000000063
  episodes_this_iter: 5
  episodes_total: 2135
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0441874705626882e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.32188134259647794
          entropy_coeff: 0.009999999999999998
          kl: 0.0027100853829270005
          policy_loss: 0.014793142014079623
          total_loss: 1.4184748576747046
          vf_explained_var: 0.29552555084228516
          vf_loss: 1.4069005376762813
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 618000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,618,16958.1,618000,-21.92,-18.7,-23.4,219.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-10-29_01-49-23
  done: false
  episode_len_mean: 219.26
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -21.92600000000004
  episode_reward_min: -23.400000000000063
  episodes_this_iter: 4
  episodes_total: 2139
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.220937352813441e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.2705299511551857
          entropy_coeff: 0.009999999999999998
          kl: 0.0017690821347778908
          policy_loss: 0.03948552227682538
          total_loss: 0.895802691910002
          vf_explained_var: 0.44823628664016724
          vf_loss: 0.8590224679973391
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 619000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,619,16989.5,619000,-21.926,-18.7,-23.4,219.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-10-29_01-49-50
  done: false
  episode_len_mean: 220.68
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.068000000000033
  episode_reward_min: -31.200000000000173
  episodes_this_iter: 4
  episodes_total: 2143
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6104686764067205e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.7942152277463013
          entropy_coeff: 0.009999999999999998
          kl: 0.039211213310332164
          policy_loss: 0.001006464163462321
          total_loss: 0.8500923686557346
          vf_explained_var: 0.5283896327018738
          vf_loss: 0.8570280588335462
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,620,17017.2,620000,-22.068,-18.7,-31.2,220.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-10-29_01-50-14
  done: false
  episode_len_mean: 223.39
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.339000000000052
  episode_reward_min: -37.600000000000264
  episodes_this_iter: 3
  episodes_total: 2146
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9157030146100805e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.6419614712397257
          entropy_coeff: 0.009999999999999998
          kl: 0.0994848394326847
          policy_loss: -0.0019861367841561635
          total_loss: 0.6047362003061506
          vf_explained_var: 0.5726887583732605
          vf_loss: 0.6231419636143578
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 621000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,621,17040.5,621000,-22.339,-18.7,-37.6,223.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-10-29_01-50-37
  done: false
  episode_len_mean: 227.2
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -22.72000000000005
  episode_reward_min: -40.400000000000304
  episodes_this_iter: 3
  episodes_total: 2149
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.873554521915118e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.8940541956159804
          entropy_coeff: 0.009999999999999998
          kl: 0.029740436425729735
          policy_loss: 0.035171532299783495
          total_loss: 0.539488876859347
          vf_explained_var: 0.2639690935611725
          vf_loss: 0.5232578859561019
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,622,17063.8,622000,-22.72,-18.7,-40.4,227.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-10-29_01-51-00
  done: false
  episode_len_mean: 230.2
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.020000000000053
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2152
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.81033178287268e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.7150937649938796
          entropy_coeff: 0.009999999999999998
          kl: 0.018299014688112278
          policy_loss: -0.018562797374195524
          total_loss: 0.430657556383974
          vf_explained_var: 0.47834116220474243
          vf_loss: 0.4663712914619181
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,623,17087,623000,-23.02,-18.7,-41.4,230.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-10-29_01-51-22
  done: false
  episode_len_mean: 234.49
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.44900000000006
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2155
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.81033178287268e-20
          cur_lr: 5.000000000000001e-05
          entropy: 2.188549745082855
          entropy_coeff: 0.009999999999999998
          kl: 0.026335162361426256
          policy_loss: -0.0453729174617264
          total_loss: 0.36495129250817826
          vf_explained_var: 0.5784701108932495
          vf_loss: 0.43220970547861526
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 624000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,624,17109.1,624000,-23.449,-18.7,-41.4,234.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-10-29_01-51-48
  done: false
  episode_len_mean: 235.61
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.561000000000064
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2158
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3215497674309016e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.1214197849233944
          entropy_coeff: 0.009999999999999998
          kl: 0.013772586255914456
          policy_loss: -0.10192462934388055
          total_loss: 0.8526820441087087
          vf_explained_var: 0.2438010722398758
          vf_loss: 0.9658208780818516
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 625000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,625,17134.4,625000,-23.561,-18.7,-41.4,235.61




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-10-29_01-52-34
  done: false
  episode_len_mean: 237.39
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -23.73900000000006
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2162
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3215497674309016e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.8030411097738478
          entropy_coeff: 0.009999999999999998
          kl: 0.024593094044849353
          policy_loss: -0.10393508623043696
          total_loss: 1.318826370106803
          vf_explained_var: 0.22657719254493713
          vf_loss: 1.4307918515470293
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 626000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,626,17180.9,626000,-23.739,-18.7,-41.4,237.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-10-29_01-52-56
  done: false
  episode_len_mean: 241.55
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.155000000000072
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2165
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9823246511463534e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.9880060354868572
          entropy_coeff: 0.009999999999999998
          kl: 0.04405690037826623
          policy_loss: 0.0029201571312215593
          total_loss: 0.6607835757235686
          vf_explained_var: 0.6590595841407776
          vf_loss: 0.6777434703376558
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,627,17202.2,627000,-24.155,-18.7,-41.4,241.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-10-29_01-53-17
  done: false
  episode_len_mean: 245.63
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.563000000000073
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2168
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.97348697671953e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.898346655898624
          entropy_coeff: 0.009999999999999998
          kl: 0.037062773579954404
          policy_loss: 0.1289300032787853
          total_loss: 0.6403591765297784
          vf_explained_var: 0.6934674382209778
          vf_loss: 0.5304126403397984
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 628000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,628,17223.9,628000,-24.563,-18.7,-41.4,245.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-10-29_01-53-38
  done: false
  episode_len_mean: 249.35
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -24.935000000000088
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2171
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.460230465079295e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.793368144830068
          entropy_coeff: 0.009999999999999998
          kl: 0.044856755474851934
          policy_loss: 0.07513519012265735
          total_loss: 0.907625847392612
          vf_explained_var: 0.34992045164108276
          vf_loss: 0.8504243289430936
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,629,17244.8,629000,-24.935,-18.7,-41.4,249.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-10-29_01-54-02
  done: false
  episode_len_mean: 252.63
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.263000000000083
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2174
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.690345697618941e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.9613420844078064
          entropy_coeff: 0.009999999999999998
          kl: 0.030135929948179808
          policy_loss: 0.04599780158864127
          total_loss: 0.6345568428436915
          vf_explained_var: 0.5692087411880493
          vf_loss: 0.6081724680960179
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 630000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,630,17268.6,630000,-25.263,-18.7,-41.4,252.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-10-29_01-54-27
  done: false
  episode_len_mean: 254.95
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.495000000000086
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2177
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0035518546428413e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.8136548346943326
          entropy_coeff: 0.009999999999999998
          kl: 0.01720633902531949
          policy_loss: -0.09870845969352457
          total_loss: 0.9734585636191898
          vf_explained_var: 0.2759650647640228
          vf_loss: 1.0903035627471076
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,631,17293.2,631000,-25.495,-18.7,-41.4,254.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-10-29_01-54-53
  done: false
  episode_len_mean: 257.51
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.7510000000001
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2181
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0035518546428413e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.450631246301863
          entropy_coeff: 0.009999999999999998
          kl: 0.029734831107595443
          policy_loss: 0.018934966375430424
          total_loss: 1.413986505402459
          vf_explained_var: 0.016123276203870773
          vf_loss: 1.4095578577783372
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,632,17319.3,632000,-25.751,-18.7,-41.4,257.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-10-29_01-55-20
  done: false
  episode_len_mean: 259.7
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -25.970000000000095
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2185
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.505327781964262e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.6364502045843337
          entropy_coeff: 0.009999999999999998
          kl: 0.02366460998928817
          policy_loss: 0.031011833747227987
          total_loss: 1.082658087544971
          vf_explained_var: 0.24148334562778473
          vf_loss: 1.0680107533931733
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,633,17346.3,633000,-25.97,-18.7,-41.4,259.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-10-29_01-55-47
  done: false
  episode_len_mean: 261.36
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.136000000000102
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2188
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.257991672946393e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.6393219616678025
          entropy_coeff: 0.009999999999999998
          kl: 0.03391402294179847
          policy_loss: -0.07085929181840685
          total_loss: 0.6401808861229155
          vf_explained_var: 0.5166884064674377
          vf_loss: 0.727433388431867
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 634000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,634,17373,634000,-26.136,-18.7,-41.4,261.36




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-10-29_01-56-30
  done: false
  episode_len_mean: 263.89
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.389000000000106
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2192
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.386987509419589e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.838997721672058
          entropy_coeff: 0.009999999999999998
          kl: 0.02546165507660224
          policy_loss: 0.04954686611890793
          total_loss: 0.7550819238026937
          vf_explained_var: 0.7119632363319397
          vf_loss: 0.7239250322182973
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,635,17415.9,635000,-26.389,-18.7,-41.4,263.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-10-29_01-56-55
  done: false
  episode_len_mean: 265.99
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.59900000000011
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2195
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.080481264129384e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.7820901287926567
          entropy_coeff: 0.009999999999999998
          kl: 0.01041064375284356
          policy_loss: 0.019671464380290774
          total_loss: 0.42398185431957247
          vf_explained_var: 0.7731571793556213
          vf_loss: 0.4221312933497959
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,636,17441.8,636000,-26.599,-18.7,-41.4,265.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-10-29_01-57-21
  done: false
  episode_len_mean: 268.92
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -26.892000000000102
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2199
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.080481264129384e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.4779663311110602
          entropy_coeff: 0.009999999999999998
          kl: 0.015227620394252976
          policy_loss: -0.057319050696161056
          total_loss: 0.7435479137632582
          vf_explained_var: 0.5587982535362244
          vf_loss: 0.8156466252273984
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,637,17466.8,637000,-26.892,-18.7,-41.4,268.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-10-29_01-57-48
  done: false
  episode_len_mean: 270.63
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -27.063000000000116
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2203
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.080481264129384e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.8782362639904022
          entropy_coeff: 0.009999999999999998
          kl: 0.018768948709349646
          policy_loss: 0.036484122855795754
          total_loss: 1.0082703272501627
          vf_explained_var: 0.49840155243873596
          vf_loss: 0.9805685606267717
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,638,17494.3,638000,-27.063,-18.7,-41.4,270.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-10-29_01-58-14
  done: false
  episode_len_mean: 271.84
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -27.18400000000012
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2206
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.080481264129384e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.9975895590252346
          entropy_coeff: 0.009999999999999998
          kl: 0.011583167784325079
          policy_loss: -0.1507905148797565
          total_loss: 0.6623062471548716
          vf_explained_var: 0.6025246381759644
          vf_loss: 0.8230726546711392
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,639,17520.2,639000,-27.184,-18.7,-41.4,271.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-10-29_01-58-37
  done: false
  episode_len_mean: 274.98
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -27.49800000000012
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2209
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.080481264129384e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.656613987021976
          entropy_coeff: 0.009999999999999998
          kl: 0.05595261266773615
          policy_loss: -0.12798326106535063
          total_loss: 0.7699888289802604
          vf_explained_var: 0.7309889197349548
          vf_loss: 0.9145382238758935
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,640,17543.7,640000,-27.498,-18.7,-41.4,274.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-10-29_01-59-01
  done: false
  episode_len_mean: 278.43
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -27.84300000000012
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2212
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.620721896194075e-18
          cur_lr: 5.000000000000001e-05
          entropy: 2.0554986768298678
          entropy_coeff: 0.009999999999999998
          kl: 0.022153256991793453
          policy_loss: -0.14479879662394524
          total_loss: 0.40013674530718063
          vf_explained_var: 0.7332356572151184
          vf_loss: 0.5654905279477437
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 641000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,641,17567.6,641000,-27.843,-18.7,-41.4,278.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-10-29_01-59-27
  done: false
  episode_len_mean: 280.67
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -28.06700000000013
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2216
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1431082844291115e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.240697384542889
          entropy_coeff: 0.009999999999999998
          kl: 0.010511487842905452
          policy_loss: 0.04676899131801393
          total_loss: 0.6385442094670402
          vf_explained_var: 0.7071040272712708
          vf_loss: 0.6041821893718508
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 642000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,642,17593.4,642000,-28.067,-18.7,-41.4,280.67




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-10-29_02-00-12
  done: false
  episode_len_mean: 282.32
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -28.232000000000134
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2220
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1431082844291115e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.0021487163172829
          entropy_coeff: 0.009999999999999998
          kl: 0.014625315097896231
          policy_loss: 0.025655635115173128
          total_loss: 0.8262841426663928
          vf_explained_var: 0.4558759927749634
          vf_loss: 0.8106499837504493
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 643000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,643,17638.1,643000,-28.232,-18.7,-41.4,282.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-10-29_02-00-38
  done: false
  episode_len_mean: 285.01
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -28.501000000000133
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2223
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1431082844291115e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.9506573067771065
          entropy_coeff: 0.009999999999999998
          kl: 0.011383700206101821
          policy_loss: 0.007039439843760597
          total_loss: 0.8595789462327957
          vf_explained_var: 0.2865390479564667
          vf_loss: 0.8720460812664694
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 644000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,644,17664.5,644000,-28.501,-18.7,-41.4,285.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-10-29_02-01-02
  done: false
  episode_len_mean: 287.22
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -28.72200000000013
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2226
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1431082844291115e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.716198327806261
          entropy_coeff: 0.009999999999999998
          kl: 0.015275801651587331
          policy_loss: -0.12626992070840465
          total_loss: 0.774389159017139
          vf_explained_var: 0.47884950041770935
          vf_loss: 0.917821063597997
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 645000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,645,17688.6,645000,-28.722,-18.7,-41.4,287.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-10-29_02-01-28
  done: false
  episode_len_mean: 289.77
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -28.97700000000014
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2230
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1431082844291115e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.450728220409817
          entropy_coeff: 0.009999999999999998
          kl: 0.011871582963451113
          policy_loss: 0.006033178998364343
          total_loss: 1.0461739281813303
          vf_explained_var: 0.30151477456092834
          vf_loss: 1.0546480218569438
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 646000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,646,17714.6,646000,-28.977,-18.7,-41.4,289.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-10-29_02-01-54
  done: false
  episode_len_mean: 292.31
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -29.23100000000015
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2234
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1431082844291115e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.559245393011305
          entropy_coeff: 0.009999999999999998
          kl: 0.017833325660173215
          policy_loss: 0.029839859157800675
          total_loss: 1.079900727669398
          vf_explained_var: 0.2779131531715393
          vf_loss: 1.0656533062458038
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 647000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,647,17740,647000,-29.231,-18.7,-41.4,292.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-10-29_02-02-19
  done: false
  episode_len_mean: 293.98
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -29.39800000000015
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2237
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1431082844291115e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.4633819619814554
          entropy_coeff: 0.009999999999999998
          kl: 0.016193091093470162
          policy_loss: 0.059806062777837114
          total_loss: 0.7718253986703025
          vf_explained_var: 0.4082461893558502
          vf_loss: 0.7266531604031722
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 648000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,648,17764.9,648000,-29.398,-18.7,-41.4,293.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 649000
  custom_metrics: {}
  date: 2021-10-29_02-02-44
  done: false
  episode_len_mean: 296.83
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -29.683000000000153
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2241
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1431082844291115e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.5482325209511651
          entropy_coeff: 0.009999999999999998
          kl: 0.03703034371904752
          policy_loss: 0.019197600003745822
          total_loss: 1.3470432811313204
          vf_explained_var: 0.3048289716243744
          vf_loss: 1.3433279984527164
    num_agent_steps_sampled: 649000
    num_agent_steps_trained: 649000
    num_steps_sampled: 649000
    num_steps_trained: 649000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,649,17789.9,649000,-29.683,-18.7,-41.4,296.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 650000
  custom_metrics: {}
  date: 2021-10-29_02-03-11
  done: false
  episode_len_mean: 295.86
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -29.586000000000148
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 3
  episodes_total: 2244
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7146624266436672e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.3808779895305634
          entropy_coeff: 0.009999999999999998
          kl: 0.014585641089210653
          policy_loss: 0.011378485792213016
          total_loss: 0.9923943123883672
          vf_explained_var: 0.36230114102363586
          vf_loss: 0.9948246030343904
    num_agent_steps_sampled: 650000
    num_agent_steps_trained: 650000
    num_steps_sampled: 650000
    num_steps_trained: 650000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,650,17816.8,650000,-29.586,-18.7,-41.4,295.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 651000
  custom_metrics: {}
  date: 2021-10-29_02-03-39
  done: false
  episode_len_mean: 293.08
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -29.30800000000014
  episode_reward_min: -41.40000000000032
  episodes_this_iter: 4
  episodes_total: 2248
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7146624266436672e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.971860722038481
          entropy_coeff: 0.009999999999999998
          kl: 0.019122495620362497
          policy_loss: 0.016464008597864045
          total_loss: 1.1285345024532742
          vf_explained_var: 0.46497684717178345
          vf_loss: 1.121789116329617
    num_agent_steps_sampled: 651000
    num_agent_steps_trained: 651000
    num_steps_sampled: 651000
    num_steps_trained: 651000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,651,17845.1,651000,-29.308,-18.7,-41.4,293.08




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 652000
  custom_metrics: {}
  date: 2021-10-29_02-04-27
  done: false
  episode_len_mean: 289.97
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -28.99700000000014
  episode_reward_min: -39.9000000000003
  episodes_this_iter: 4
  episodes_total: 2252
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7146624266436672e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.1924254801538257
          entropy_coeff: 0.009999999999999998
          kl: 0.031886859374828366
          policy_loss: -0.05031005827089151
          total_loss: 1.1578568438688914
          vf_explained_var: 0.3440766930580139
          vf_loss: 1.2200911521911622
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_steps_sampled: 652000
    num_steps_trained: 652000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,652,17893.2,652000,-28.997,-18.7,-39.9,289.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 653000
  custom_metrics: {}
  date: 2021-10-29_02-04-54
  done: false
  episode_len_mean: 287.04
  episode_media: {}
  episode_reward_max: -18.699999999999996
  episode_reward_mean: -28.704000000000136
  episode_reward_min: -39.9000000000003
  episodes_this_iter: 4
  episodes_total: 2256
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5719936399655007e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.2051893168025547
          entropy_coeff: 0.009999999999999998
          kl: 0.02595794906831167
          policy_loss: -0.0035375874903466964
          total_loss: 0.5597758673959308
          vf_explained_var: 0.749941349029541
          vf_loss: 0.5753653410408232
    num_agent_steps_sampled: 653000
    num_agent_steps_trained: 653000
    num_steps_sampled: 653000
    num_steps_trained: 653000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,653,17920.4,653000,-28.704,-18.7,-39.9,287.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 654000
  custom_metrics: {}
  date: 2021-10-29_02-05-24
  done: false
  episode_len_mean: 286.14
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.614000000000136
  episode_reward_min: -39.9000000000003
  episodes_this_iter: 4
  episodes_total: 2260
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.85799045994825e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.1314387374454073
          entropy_coeff: 0.009999999999999998
          kl: 0.057138012567525466
          policy_loss: 0.029482988682058123
          total_loss: 0.6043197711308798
          vf_explained_var: 0.5718503594398499
          vf_loss: 0.5861511699027485
    num_agent_steps_sampled: 654000
    num_agent_steps_trained: 654000
    num_steps_sampled: 654000
    num_steps_trained: 654000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,654,17949.8,654000,-28.614,-20.9,-39.9,286.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 655000
  custom_metrics: {}
  date: 2021-10-29_02-05-53
  done: false
  episode_len_mean: 284.07
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.407000000000135
  episode_reward_min: -39.9000000000003
  episodes_this_iter: 4
  episodes_total: 2264
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.786985689922377e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.1314832223786249
          entropy_coeff: 0.009999999999999998
          kl: 0.023314659684796544
          policy_loss: -0.03427577291925748
          total_loss: 0.9013827827241686
          vf_explained_var: 0.4740297198295593
          vf_loss: 0.9469733867380354
    num_agent_steps_sampled: 655000
    num_agent_steps_trained: 655000
    num_steps_sampled: 655000
    num_steps_trained: 655000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,655,17978.9,655000,-28.407,-20.9,-39.9,284.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 656000
  custom_metrics: {}
  date: 2021-10-29_02-06-18
  done: false
  episode_len_mean: 281.04
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.104000000000134
  episode_reward_min: -36.40000000000025
  episodes_this_iter: 4
  episodes_total: 2268
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.680478534883566e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.3785606940587363
          entropy_coeff: 0.009999999999999998
          kl: 0.020598225962818163
          policy_loss: -0.029702663504415087
          total_loss: 0.41399020122157204
          vf_explained_var: 0.8169419169425964
          vf_loss: 0.4574784689479404
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_steps_sampled: 656000
    num_steps_trained: 656000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,656,18004.1,656000,-28.104,-20.9,-36.4,281.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 657000
  custom_metrics: {}
  date: 2021-10-29_02-06-44
  done: false
  episode_len_mean: 279.24
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -27.924000000000127
  episode_reward_min: -35.00000000000023
  episodes_this_iter: 3
  episodes_total: 2271
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3020717802325352e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1168272051546309
          entropy_coeff: 0.009999999999999998
          kl: 0.029328279583059412
          policy_loss: -0.02849156405362818
          total_loss: 1.0551747785674201
          vf_explained_var: 0.32579827308654785
          vf_loss: 1.0948346038659413
    num_agent_steps_sampled: 657000
    num_agent_steps_trained: 657000
    num_steps_sampled: 657000
    num_steps_trained: 657000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,657,18030.3,657000,-27.924,-20.9,-35,279.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 658000
  custom_metrics: {}
  date: 2021-10-29_02-07-11
  done: false
  episode_len_mean: 277.15
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -27.715000000000117
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 2275
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.953107670348802e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.017215785715315
          entropy_coeff: 0.009999999999999998
          kl: 0.013723300982633052
          policy_loss: -0.009979835318194494
          total_loss: 0.9780856139130063
          vf_explained_var: 0.19010986387729645
          vf_loss: 0.9982376052273645
    num_agent_steps_sampled: 658000
    num_agent_steps_trained: 658000
    num_steps_sampled: 658000
    num_steps_trained: 658000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,658,18056.7,658000,-27.715,-20.9,-34.9,277.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 659000
  custom_metrics: {}
  date: 2021-10-29_02-07-39
  done: false
  episode_len_mean: 275.6
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -27.560000000000123
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 2279
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.953107670348802e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0677732123268975
          entropy_coeff: 0.009999999999999998
          kl: 0.03374260354847599
          policy_loss: -0.02204661981927024
          total_loss: 0.7292580523424679
          vf_explained_var: 0.5162371397018433
          vf_loss: 0.761982399225235
    num_agent_steps_sampled: 659000
    num_agent_steps_trained: 659000
    num_steps_sampled: 659000
    num_steps_trained: 659000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,659,18084.6,659000,-27.56,-20.9,-34.9,275.6




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 660000
  custom_metrics: {}
  date: 2021-10-29_02-08-25
  done: false
  episode_len_mean: 274.82
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -27.48200000000012
  episode_reward_min: -34.900000000000226
  episodes_this_iter: 4
  episodes_total: 2283
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9296615055232033e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.173702645301819
          entropy_coeff: 0.009999999999999998
          kl: 0.04527421258687553
          policy_loss: 0.001326328847143385
          total_loss: 0.8132837229304843
          vf_explained_var: 0.469831645488739
          vf_loss: 0.823694415224923
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_steps_sampled: 660000
    num_steps_trained: 660000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,660,18130.6,660000,-27.482,-20.9,-34.9,274.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 661000
  custom_metrics: {}
  date: 2021-10-29_02-08-50
  done: false
  episode_len_mean: 275.91
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -27.591000000000122
  episode_reward_min: -37.80000000000027
  episodes_this_iter: 3
  episodes_total: 2286
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3944922582848047e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.3033194171057807
          entropy_coeff: 0.009999999999999998
          kl: 0.045342589563690756
          policy_loss: -0.030779650145106845
          total_loss: 0.6557597464985317
          vf_explained_var: 0.42566829919815063
          vf_loss: 0.6995725966989994
    num_agent_steps_sampled: 661000
    num_agent_steps_trained: 661000
    num_steps_sampled: 661000
    num_steps_trained: 661000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,661,18156.2,661000,-27.591,-20.9,-37.8,275.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 662000
  custom_metrics: {}
  date: 2021-10-29_02-09-11
  done: false
  episode_len_mean: 278.22
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -27.82200000000013
  episode_reward_min: -40.500000000000306
  episodes_this_iter: 3
  episodes_total: 2289
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.591738387427208e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1851350280973647
          entropy_coeff: 0.009999999999999998
          kl: 0.028375162198638094
          policy_loss: 0.07815889004204009
          total_loss: 0.8957345296939214
          vf_explained_var: 0.27996864914894104
          vf_loss: 0.829426997154951
    num_agent_steps_sampled: 662000
    num_agent_steps_trained: 662000
    num_steps_sampled: 662000
    num_steps_trained: 662000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,662,18176.7,662000,-27.822,-20.9,-40.5,278.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 663000
  custom_metrics: {}
  date: 2021-10-29_02-09-31
  done: false
  episode_len_mean: 280.74
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.074000000000133
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2292
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.887607581140813e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.2610811207029555
          entropy_coeff: 0.009999999999999998
          kl: 0.01326824428832146
          policy_loss: 0.07475817211800151
          total_loss: 0.9798566470543544
          vf_explained_var: -0.1604650616645813
          vf_loss: 0.917709293961525
    num_agent_steps_sampled: 663000
    num_agent_steps_trained: 663000
    num_steps_sampled: 663000
    num_steps_trained: 663000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,663,18196.5,663000,-28.074,-20.9,-40.7,280.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 664000
  custom_metrics: {}
  date: 2021-10-29_02-09-53
  done: false
  episode_len_mean: 282.05
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.205000000000133
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2295
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.887607581140813e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.488233670923445
          entropy_coeff: 0.009999999999999998
          kl: 0.013414405848280353
          policy_loss: 0.028621862166457707
          total_loss: 1.0257960928810967
          vf_explained_var: 0.17956417798995972
          vf_loss: 1.0120565778679318
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_steps_sampled: 664000
    num_steps_trained: 664000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,664,18219,664000,-28.205,-20.9,-40.7,282.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 665000
  custom_metrics: {}
  date: 2021-10-29_02-10-17
  done: false
  episode_len_mean: 282.66
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.26600000000013
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2298
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.887607581140813e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1320061829355028
          entropy_coeff: 0.009999999999999998
          kl: 0.007588456395633495
          policy_loss: 0.04414192106988695
          total_loss: 1.12577566769388
          vf_explained_var: 0.047911204397678375
          vf_loss: 1.0929537976781527
    num_agent_steps_sampled: 665000
    num_agent_steps_trained: 665000
    num_steps_sampled: 665000
    num_steps_trained: 665000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,665,18242.7,665000,-28.266,-20.9,-40.7,282.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 666000
  custom_metrics: {}
  date: 2021-10-29_02-10-41
  done: false
  episode_len_mean: 284.38
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.438000000000134
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2301
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.887607581140813e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.186290247572793
          entropy_coeff: 0.009999999999999998
          kl: 0.02386688766815305
          policy_loss: 0.0537493910226557
          total_loss: 1.356489285826683
          vf_explained_var: 0.17460381984710693
          vf_loss: 1.3146027824117077
    num_agent_steps_sampled: 666000
    num_agent_steps_trained: 666000
    num_steps_sampled: 666000
    num_steps_trained: 666000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,666,18266.4,666000,-28.438,-20.9,-40.7,284.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 667000
  custom_metrics: {}
  date: 2021-10-29_02-11-04
  done: false
  episode_len_mean: 286.18
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.618000000000137
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2304
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.483141137171122e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.2341636757055918
          entropy_coeff: 0.009999999999999998
          kl: 0.01149566775050551
          policy_loss: 0.10311746067470974
          total_loss: 1.0012771318356195
          vf_explained_var: -0.1180247813463211
          vf_loss: 0.9105013037307395
    num_agent_steps_sampled: 667000
    num_agent_steps_trained: 667000
    num_steps_sampled: 667000
    num_steps_trained: 667000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,667,18289.4,667000,-28.618,-20.9,-40.7,286.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 668000
  custom_metrics: {}
  date: 2021-10-29_02-11-27
  done: false
  episode_len_mean: 286.74
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.67400000000014
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2307
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.483141137171122e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.0894309295548332
          entropy_coeff: 0.009999999999999998
          kl: 0.02605017260057044
          policy_loss: -0.14441701256566578
          total_loss: 1.7975362327363755
          vf_explained_var: 0.035450275987386703
          vf_loss: 1.9528475536240473
    num_agent_steps_sampled: 668000
    num_agent_steps_trained: 668000
    num_steps_sampled: 668000
    num_steps_trained: 668000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,668,18313,668000,-28.674,-20.9,-40.7,286.74




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 669000
  custom_metrics: {}
  date: 2021-10-29_02-12-11
  done: false
  episode_len_mean: 284.8
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.480000000000132
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2311
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2247117057566824e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.1805555052227443
          entropy_coeff: 0.009999999999999998
          kl: 0.011850485929230735
          policy_loss: 0.0030908854885233774
          total_loss: 1.5042673250039418
          vf_explained_var: 0.24782387912273407
          vf_loss: 1.5129820108413696
    num_agent_steps_sampled: 669000
    num_agent_steps_trained: 669000
    num_steps_sampled: 669000
    num_steps_trained: 669000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,669,18356.3,669000,-28.48,-20.9,-40.7,284.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 670000
  custom_metrics: {}
  date: 2021-10-29_02-12-36
  done: false
  episode_len_mean: 284.65
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.465000000000128
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2314
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2247117057566824e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.2903940896193187
          entropy_coeff: 0.009999999999999998
          kl: 0.020054074007514248
          policy_loss: -0.031508132401439876
          total_loss: 1.3256567749712203
          vf_explained_var: 0.06018318980932236
          vf_loss: 1.370068872637219
    num_agent_steps_sampled: 670000
    num_agent_steps_trained: 670000
    num_steps_sampled: 670000
    num_steps_trained: 670000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,670,18381.6,670000,-28.465,-20.9,-40.7,284.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 671000
  custom_metrics: {}
  date: 2021-10-29_02-13-00
  done: false
  episode_len_mean: 285.38
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.538000000000135
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2317
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.337067558635024e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.334647630320655
          entropy_coeff: 0.009999999999999998
          kl: 0.01759283565777127
          policy_loss: -0.13170957846773995
          total_loss: 1.4537135826216803
          vf_explained_var: 0.16750280559062958
          vf_loss: 1.5987696342998081
    num_agent_steps_sampled: 671000
    num_agent_steps_trained: 671000
    num_steps_sampled: 671000
    num_steps_trained: 671000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,671,18405.7,671000,-28.538,-20.9,-40.7,285.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 672000
  custom_metrics: {}
  date: 2021-10-29_02-13-23
  done: false
  episode_len_mean: 287.71
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.77100000000014
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2321
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.337067558635024e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.3753385583559672
          entropy_coeff: 0.009999999999999998
          kl: 0.03022615478659642
          policy_loss: 0.03290383128656282
          total_loss: 1.2652525590525734
          vf_explained_var: -0.0003419948916416615
          vf_loss: 1.2461021188232633
    num_agent_steps_sampled: 672000
    num_agent_steps_trained: 672000
    num_steps_sampled: 672000
    num_steps_trained: 672000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,672,18428.8,672000,-28.771,-20.9,-40.7,287.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 673000
  custom_metrics: {}
  date: 2021-10-29_02-13-49
  done: false
  episode_len_mean: 287.97
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.79700000000014
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2324
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.0056013379525365e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.1786264929506514
          entropy_coeff: 0.009999999999999998
          kl: 0.005054476118337783
          policy_loss: 0.04931549032529195
          total_loss: 1.0399378220240274
          vf_explained_var: 0.10252968221902847
          vf_loss: 1.002408592734072
    num_agent_steps_sampled: 673000
    num_agent_steps_trained: 673000
    num_steps_sampled: 673000
    num_steps_trained: 673000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,673,18454.5,673000,-28.797,-20.9,-40.7,287.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 674000
  custom_metrics: {}
  date: 2021-10-29_02-14-13
  done: false
  episode_len_mean: 287.33
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.73300000000014
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2327
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.0056013379525365e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.2500527461369833
          entropy_coeff: 0.009999999999999998
          kl: 0.014305621052081503
          policy_loss: -0.07734690805276236
          total_loss: 1.5705468032095167
          vf_explained_var: 0.1243884414434433
          vf_loss: 1.6603942301538255
    num_agent_steps_sampled: 674000
    num_agent_steps_trained: 674000
    num_steps_sampled: 674000
    num_steps_trained: 674000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,674,18478.5,674000,-28.733,-20.9,-40.7,287.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 675000
  custom_metrics: {}
  date: 2021-10-29_02-14-42
  done: false
  episode_len_mean: 286.93
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.69300000000014
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2331
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.0056013379525365e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5307257231738832
          entropy_coeff: 0.009999999999999998
          kl: 0.027395607534568475
          policy_loss: 0.019929756886429256
          total_loss: 1.233932555384106
          vf_explained_var: 0.15398898720741272
          vf_loss: 1.21931005915006
    num_agent_steps_sampled: 675000
    num_agent_steps_trained: 675000
    num_steps_sampled: 675000
    num_steps_trained: 675000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,675,18507.1,675000,-28.693,-20.9,-40.7,286.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 676000
  custom_metrics: {}
  date: 2021-10-29_02-15-10
  done: false
  episode_len_mean: 286.18
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.618000000000134
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2335
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.508402006928805e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.8761498941315545
          entropy_coeff: 0.009999999999999998
          kl: 0.028877668804644527
          policy_loss: 0.03310853040052785
          total_loss: 1.1642069379488627
          vf_explained_var: 0.394464373588562
          vf_loss: 1.1398599002096388
    num_agent_steps_sampled: 676000
    num_agent_steps_trained: 676000
    num_steps_sampled: 676000
    num_steps_trained: 676000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,676,18535.3,676000,-28.618,-20.9,-40.7,286.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 677000
  custom_metrics: {}
  date: 2021-10-29_02-15-36
  done: false
  episode_len_mean: 285.16
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -28.516000000000137
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2339
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1262603010393207e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.1933128396670023
          entropy_coeff: 0.009999999999999998
          kl: 0.023394299203160704
          policy_loss: 0.01669609724647469
          total_loss: 1.2844118416309356
          vf_explained_var: 0.3258626163005829
          vf_loss: 1.2796488569842444
    num_agent_steps_sampled: 677000
    num_agent_steps_trained: 677000
    num_steps_sampled: 677000
    num_steps_trained: 677000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,677,18561.9,677000,-28.516,-20.9,-40.7,285.16




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 678000
  custom_metrics: {}
  date: 2021-10-29_02-16-21
  done: false
  episode_len_mean: 284.93
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -28.493000000000137
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2343
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6893904515589812e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.294819082154168
          entropy_coeff: 0.009999999999999998
          kl: 0.0255618968729896
          policy_loss: -0.00860670730471611
          total_loss: 1.1574304421742758
          vf_explained_var: 0.35541674494743347
          vf_loss: 1.1789853407277002
    num_agent_steps_sampled: 678000
    num_agent_steps_trained: 678000
    num_steps_sampled: 678000
    num_steps_trained: 678000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,678,18606.4,678000,-28.493,-19.3,-40.7,284.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 679000
  custom_metrics: {}
  date: 2021-10-29_02-16-46
  done: false
  episode_len_mean: 285.1
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -28.510000000000137
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2346
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5340856773384713e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2961641669273376
          entropy_coeff: 0.009999999999999998
          kl: 0.022828381567876397
          policy_loss: 0.05818194448947907
          total_loss: 1.0610835101869371
          vf_explained_var: -0.13268736004829407
          vf_loss: 1.0158632086382973
    num_agent_steps_sampled: 679000
    num_agent_steps_trained: 679000
    num_steps_sampled: 679000
    num_steps_trained: 679000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,679,18631.8,679000,-28.51,-19.3,-40.7,285.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 680000
  custom_metrics: {}
  date: 2021-10-29_02-17-09
  done: false
  episode_len_mean: 287.72
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -28.772000000000133
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2349
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.801128516007708e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.3887098577287462
          entropy_coeff: 0.009999999999999998
          kl: 0.02110807152086795
          policy_loss: 0.045268689592679344
          total_loss: 1.0345559196339713
          vf_explained_var: 0.3670133948326111
          vf_loss: 1.0031743216845723
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_steps_sampled: 680000
    num_steps_trained: 680000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,680,18654.7,680000,-28.772,-19.3,-40.7,287.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 681000
  custom_metrics: {}
  date: 2021-10-29_02-17-35
  done: false
  episode_len_mean: 290.06
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -29.00600000000014
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2352
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.701692774011558e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5683566133181255
          entropy_coeff: 0.009999999999999998
          kl: 0.01631734756990949
          policy_loss: 0.00022744557095898523
          total_loss: 0.8829016546408336
          vf_explained_var: 0.35251614451408386
          vf_loss: 0.8983577741516962
    num_agent_steps_sampled: 681000
    num_agent_steps_trained: 681000
    num_steps_sampled: 681000
    num_steps_trained: 681000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,681,18680,681000,-29.006,-19.3,-40.7,290.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 682000
  custom_metrics: {}
  date: 2021-10-29_02-18-02
  done: false
  episode_len_mean: 289.54
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -28.954000000000136
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2356
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.701692774011558e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.7995356483591928
          entropy_coeff: 0.009999999999999998
          kl: 0.027468845782885786
          policy_loss: -0.09590157998932733
          total_loss: 1.456736555364397
          vf_explained_var: -0.1636381298303604
          vf_loss: 1.56063347881039
    num_agent_steps_sampled: 682000
    num_agent_steps_trained: 682000
    num_steps_sampled: 682000
    num_steps_trained: 682000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,682,18707.4,682000,-28.954,-19.3,-40.7,289.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 683000
  custom_metrics: {}
  date: 2021-10-29_02-18-27
  done: false
  episode_len_mean: 291.26
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -29.126000000000143
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2360
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.552539161017343e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2958999580807156
          entropy_coeff: 0.009999999999999998
          kl: 0.014623496779823084
          policy_loss: 0.04110879293746418
          total_loss: 1.3913037326600817
          vf_explained_var: 0.2562849819660187
          vf_loss: 1.3631539503733316
    num_agent_steps_sampled: 683000
    num_agent_steps_trained: 683000
    num_steps_sampled: 683000
    num_steps_trained: 683000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,683,18732.4,683000,-29.126,-19.3,-40.7,291.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 684000
  custom_metrics: {}
  date: 2021-10-29_02-18-56
  done: false
  episode_len_mean: 291.32
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -29.132000000000147
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2364
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.552539161017343e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.6328680778543154
          entropy_coeff: 0.009999999999999998
          kl: 0.005972219656432287
          policy_loss: 0.05488339621159766
          total_loss: 1.2692500286632113
          vf_explained_var: 0.11794290691614151
          vf_loss: 1.220695306195153
    num_agent_steps_sampled: 684000
    num_agent_steps_trained: 684000
    num_steps_sampled: 684000
    num_steps_trained: 684000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,684,18761.2,684000,-29.132,-19.3,-40.7,291.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 685000
  custom_metrics: {}
  date: 2021-10-29_02-19-25
  done: false
  episode_len_mean: 289.58
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -28.958000000000144
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2368
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.552539161017343e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.6237150520914131
          entropy_coeff: 0.009999999999999998
          kl: 0.006952683228375673
          policy_loss: 0.021806558966636656
          total_loss: 1.165094921323988
          vf_explained_var: 0.13198545575141907
          vf_loss: 1.1495255132516224
    num_agent_steps_sampled: 685000
    num_agent_steps_trained: 685000
    num_steps_sampled: 685000
    num_steps_trained: 685000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,685,18790.7,685000,-28.958,-19.3,-40.7,289.58




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 686000
  custom_metrics: {}
  date: 2021-10-29_02-20-07
  done: false
  episode_len_mean: 290.33
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -29.033000000000133
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2371
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.552539161017343e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.3424919605255128
          entropy_coeff: 0.009999999999999998
          kl: 0.009477344918169687
          policy_loss: -0.020831378797690075
          total_loss: 0.6678667763868967
          vf_explained_var: 0.41263166069984436
          vf_loss: 0.702123073902395
    num_agent_steps_sampled: 686000
    num_agent_steps_trained: 686000
    num_steps_sampled: 686000
    num_steps_trained: 686000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,686,18831.8,686000,-29.033,-19.3,-40.7,290.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 687000
  custom_metrics: {}
  date: 2021-10-29_02-20-32
  done: false
  episode_len_mean: 291.22
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -29.12200000000015
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2375
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.552539161017343e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2807614929146236
          entropy_coeff: 0.009999999999999998
          kl: 0.021848269506729422
          policy_loss: 0.04305527276462979
          total_loss: 1.1543235533767275
          vf_explained_var: 0.2695474922657013
          vf_loss: 1.1240758766730627
    num_agent_steps_sampled: 687000
    num_agent_steps_trained: 687000
    num_steps_sampled: 687000
    num_steps_trained: 687000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,687,18856.8,687000,-29.122,-19.3,-40.7,291.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 688000
  custom_metrics: {}
  date: 2021-10-29_02-20-58
  done: false
  episode_len_mean: 291.64
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -29.16400000000015
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 2378
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2828808741526014e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.2799830363856421
          entropy_coeff: 0.009999999999999998
          kl: 0.08359202803705197
          policy_loss: 0.02409598645236757
          total_loss: 1.1806970808241102
          vf_explained_var: 0.3379061818122864
          vf_loss: 1.169400924609767
    num_agent_steps_sampled: 688000
    num_agent_steps_trained: 688000
    num_steps_sampled: 688000
    num_steps_trained: 688000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,688,18883,688000,-29.164,-19.3,-40.7,291.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 689000
  custom_metrics: {}
  date: 2021-10-29_02-21-27
  done: false
  episode_len_mean: 291.69
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -29.169000000000146
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 2382
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.924321311228901e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.9537969085905287
          entropy_coeff: 0.009999999999999998
          kl: 0.013532477434375799
          policy_loss: -0.014712476813130908
          total_loss: 1.3626538117726643
          vf_explained_var: 0.23682066798210144
          vf_loss: 1.3869042595227559
    num_agent_steps_sampled: 689000
    num_agent_steps_trained: 689000
    num_steps_sampled: 689000
    num_steps_trained: 689000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,689,18911.9,689000,-29.169,-19.3,-40.7,291.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 690000
  custom_metrics: {}
  date: 2021-10-29_02-21-57
  done: false
  episode_len_mean: 288.93
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -28.893000000000143
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 5
  episodes_total: 2387
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.924321311228901e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.491836311088668
          entropy_coeff: 0.009999999999999998
          kl: 0.024339007414966654
          policy_loss: -0.011029740754101012
          total_loss: 1.4457994209395515
          vf_explained_var: 0.3277561664581299
          vf_loss: 1.4617475244734022
    num_agent_steps_sampled: 690000
    num_agent_steps_trained: 690000
    num_steps_sampled: 690000
    num_steps_trained: 690000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,690,18942.6,690000,-28.893,-19.3,-40.7,288.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 691000
  custom_metrics: {}
  date: 2021-10-29_02-22-28
  done: false
  episode_len_mean: 283.29
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -28.32900000000013
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 2391
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8864819668433523e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7437039448155297
          entropy_coeff: 0.009999999999999998
          kl: 0.004942834529356727
          policy_loss: 0.06805358760886722
          total_loss: 1.206238172451655
          vf_explained_var: 0.46764203906059265
          vf_loss: 1.145621614323722
    num_agent_steps_sampled: 691000
    num_agent_steps_trained: 691000
    num_steps_sampled: 691000
    num_steps_trained: 691000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,691,18972.7,691000,-28.329,-19.3,-39,283.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 692000
  custom_metrics: {}
  date: 2021-10-29_02-22-59
  done: false
  episode_len_mean: 279.03
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.903000000000123
  episode_reward_min: -35.20000000000023
  episodes_this_iter: 4
  episodes_total: 2395
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4432409834216761e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7515349355836709
          entropy_coeff: 0.009999999999999998
          kl: 0.012621066968952575
          policy_loss: 0.014932917803525925
          total_loss: 0.9960480795966254
          vf_explained_var: 0.4328818917274475
          vf_loss: 0.9886305199729072
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_steps_sampled: 692000
    num_steps_trained: 692000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,692,19003.7,692000,-27.903,-19.3,-35.2,279.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 693000
  custom_metrics: {}
  date: 2021-10-29_02-23-28
  done: false
  episode_len_mean: 275.86
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.586000000000116
  episode_reward_min: -35.20000000000023
  episodes_this_iter: 4
  episodes_total: 2399
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4432409834216761e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.5065069396462706
          entropy_coeff: 0.009999999999999998
          kl: 0.004833717570470242
          policy_loss: -0.02250714103380839
          total_loss: 1.07355314956771
          vf_explained_var: 0.3164706230163574
          vf_loss: 1.101125356886122
    num_agent_steps_sampled: 693000
    num_agent_steps_trained: 693000
    num_steps_sampled: 693000
    num_steps_trained: 693000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,693,19033.4,693000,-27.586,-19.3,-35.2,275.86




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 694000
  custom_metrics: {}
  date: 2021-10-29_02-24-15
  done: false
  episode_len_mean: 272.31
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.23100000000012
  episode_reward_min: -34.200000000000216
  episodes_this_iter: 4
  episodes_total: 2403
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.216204917108381e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8705683256189028
          entropy_coeff: 0.009999999999999998
          kl: 0.02312558302188212
          policy_loss: -0.0052965119481086734
          total_loss: 0.997299176454544
          vf_explained_var: 0.5025886297225952
          vf_loss: 1.0113013757599725
    num_agent_steps_sampled: 694000
    num_agent_steps_trained: 694000
    num_steps_sampled: 694000
    num_steps_trained: 694000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,694,19080.2,694000,-27.231,-19.3,-34.2,272.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 695000
  custom_metrics: {}
  date: 2021-10-29_02-24-39
  done: false
  episode_len_mean: 272.15
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.215000000000117
  episode_reward_min: -34.200000000000216
  episodes_this_iter: 4
  episodes_total: 2407
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.082430737566257e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.697325395213233
          entropy_coeff: 0.009999999999999998
          kl: 0.015026186885421329
          policy_loss: 0.022323318901989193
          total_loss: 1.057123374276691
          vf_explained_var: 0.41826188564300537
          vf_loss: 1.0517733136812846
    num_agent_steps_sampled: 695000
    num_agent_steps_trained: 695000
    num_steps_sampled: 695000
    num_steps_trained: 695000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,695,19103.8,695000,-27.215,-19.3,-34.2,272.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 696000
  custom_metrics: {}
  date: 2021-10-29_02-25-06
  done: false
  episode_len_mean: 271.68
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.16800000000011
  episode_reward_min: -34.200000000000216
  episodes_this_iter: 3
  episodes_total: 2410
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.082430737566257e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.1699026809798347
          entropy_coeff: 0.009999999999999998
          kl: 0.018564084099325563
          policy_loss: -0.10471200197935104
          total_loss: 1.2200969636440278
          vf_explained_var: 0.3988148272037506
          vf_loss: 1.3365079736544025
    num_agent_steps_sampled: 696000
    num_agent_steps_trained: 696000
    num_steps_sampled: 696000
    num_steps_trained: 696000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,696,19131.1,696000,-27.168,-19.3,-34.2,271.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 697000
  custom_metrics: {}
  date: 2021-10-29_02-25-32
  done: false
  episode_len_mean: 270.76
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.076000000000118
  episode_reward_min: -34.200000000000216
  episodes_this_iter: 4
  episodes_total: 2414
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.082430737566257e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.1936157137155532
          entropy_coeff: 0.009999999999999998
          kl: 0.021569971903948607
          policy_loss: 0.046359714907076625
          total_loss: 1.2969444274902344
          vf_explained_var: 0.3597766160964966
          vf_loss: 1.2625208603011238
    num_agent_steps_sampled: 697000
    num_agent_steps_trained: 697000
    num_steps_sampled: 697000
    num_steps_trained: 697000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,697,19157,697000,-27.076,-19.3,-34.2,270.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 698000
  custom_metrics: {}
  date: 2021-10-29_02-25-59
  done: false
  episode_len_mean: 269.66
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.96600000000012
  episode_reward_min: -34.200000000000216
  episodes_this_iter: 4
  episodes_total: 2418
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6236461063493853e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.4593257625897726
          entropy_coeff: 0.009999999999999998
          kl: 0.009759363208051673
          policy_loss: 0.024005750608113077
          total_loss: 0.6681822896003723
          vf_explained_var: 0.6481433510780334
          vf_loss: 0.6587697906626595
    num_agent_steps_sampled: 698000
    num_agent_steps_trained: 698000
    num_steps_sampled: 698000
    num_steps_trained: 698000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,698,19183.8,698000,-26.966,-19.3,-34.2,269.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 699000
  custom_metrics: {}
  date: 2021-10-29_02-26-26
  done: false
  episode_len_mean: 268.74
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.87400000000011
  episode_reward_min: -34.200000000000216
  episodes_this_iter: 3
  episodes_total: 2421
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6236461063493853e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.719497060775757
          entropy_coeff: 0.009999999999999998
          kl: 0.025742752984537237
          policy_loss: 0.0900228770242797
          total_loss: 0.4418017038040691
          vf_explained_var: 0.8250298500061035
          vf_loss: 0.3689737978908751
    num_agent_steps_sampled: 699000
    num_agent_steps_trained: 699000
    num_steps_sampled: 699000
    num_steps_trained: 699000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,699,19211.1,699000,-26.874,-19.3,-34.2,268.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 700000
  custom_metrics: {}
  date: 2021-10-29_02-26-56
  done: false
  episode_len_mean: 267.29
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.729000000000113
  episode_reward_min: -34.200000000000216
  episodes_this_iter: 4
  episodes_total: 2425
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4354691595240786e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.07163012723128
          entropy_coeff: 0.009999999999999998
          kl: 0.016405593397749958
          policy_loss: -0.03273380829228295
          total_loss: 1.0146571907732223
          vf_explained_var: 0.47292980551719666
          vf_loss: 1.0581072926521302
    num_agent_steps_sampled: 700000
    num_agent_steps_trained: 700000
    num_steps_sampled: 700000
    num_steps_trained: 700000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,700,19240.5,700000,-26.729,-19.3,-34.2,267.29




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 701000
  custom_metrics: {}
  date: 2021-10-29_02-27-42
  done: false
  episode_len_mean: 264.48
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.448000000000107
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 5
  episodes_total: 2430
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4354691595240786e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.3077356020609538
          entropy_coeff: 0.009999999999999998
          kl: 0.0015751041299432067
          policy_loss: -0.12762138744195303
          total_loss: 1.4937432116932339
          vf_explained_var: 0.11970405280590057
          vf_loss: 1.6244419482019212
    num_agent_steps_sampled: 701000
    num_agent_steps_trained: 701000
    num_steps_sampled: 701000
    num_steps_trained: 70100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,701,19286.6,701000,-26.448,-19.3,-34.1,264.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 702000
  custom_metrics: {}
  date: 2021-10-29_02-28-12
  done: false
  episode_len_mean: 263.52
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.352000000000107
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 2434
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2177345797620393e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.6290124466021856
          entropy_coeff: 0.009999999999999998
          kl: 0.009346769777125669
          policy_loss: -0.03816797791255845
          total_loss: 0.8861883607175615
          vf_explained_var: 0.42087191343307495
          vf_loss: 0.9306464632352193
    num_agent_steps_sampled: 702000
    num_agent_steps_trained: 702000
    num_steps_sampled: 702000
    num_steps_trained: 702000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,702,19317.1,702000,-26.352,-19.3,-34.1,263.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 703000
  custom_metrics: {}
  date: 2021-10-29_02-28-41
  done: false
  episode_len_mean: 263.47
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.347000000000097
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 2438
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2177345797620393e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7749473157856199
          entropy_coeff: 0.009999999999999998
          kl: 0.02255851477925369
          policy_loss: -0.09195389135016335
          total_loss: 1.0310479707188076
          vf_explained_var: 0.4887169599533081
          vf_loss: 1.130751331647237
    num_agent_steps_sampled: 703000
    num_agent_steps_trained: 703000
    num_steps_sampled: 703000
    num_steps_trained: 703000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,703,19346,703000,-26.347,-19.3,-34.1,263.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 704000
  custom_metrics: {}
  date: 2021-10-29_02-29-06
  done: false
  episode_len_mean: 263.76
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.376000000000104
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 3
  episodes_total: 2441
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8266018696430587e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.0325297958321042
          entropy_coeff: 0.009999999999999998
          kl: 0.016958995392147277
          policy_loss: 0.004524484690692689
          total_loss: 0.4635870185163286
          vf_explained_var: 0.7858052849769592
          vf_loss: 0.4693878370026747
    num_agent_steps_sampled: 704000
    num_agent_steps_trained: 704000
    num_steps_sampled: 704000
    num_steps_trained: 704000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,704,19371.2,704000,-26.376,-21.5,-34.1,263.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 705000
  custom_metrics: {}
  date: 2021-10-29_02-29-31
  done: false
  episode_len_mean: 263.18
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.3180000000001
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 4
  episodes_total: 2445
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8266018696430587e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.3816519843207464
          entropy_coeff: 0.009999999999999998
          kl: 0.011046956032867023
          policy_loss: 0.13184796157810424
          total_loss: 0.6226831158002217
          vf_explained_var: 0.755305826663971
          vf_loss: 0.5046516726414363
    num_agent_steps_sampled: 705000
    num_agent_steps_trained: 705000
    num_steps_sampled: 705000
    num_steps_trained: 705000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,705,19395.9,705000,-26.318,-21.5,-34.1,263.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 706000
  custom_metrics: {}
  date: 2021-10-29_02-29-57
  done: false
  episode_len_mean: 263.12
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.31200000000011
  episode_reward_min: -34.100000000000215
  episodes_this_iter: 3
  episodes_total: 2448
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8266018696430587e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.222699710395601
          entropy_coeff: 0.009999999999999998
          kl: 0.012599628918246945
          policy_loss: 0.043150424791706936
          total_loss: 0.5843078705999586
          vf_explained_var: 0.6629073023796082
          vf_loss: 0.5533844373292394
    num_agent_steps_sampled: 706000
    num_agent_steps_trained: 706000
    num_steps_sampled: 706000
    num_steps_trained: 706000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,706,19421.6,706000,-26.312,-21.5,-34.1,263.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 707000
  custom_metrics: {}
  date: 2021-10-29_02-30-21
  done: false
  episode_len_mean: 262.98
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.2980000000001
  episode_reward_min: -40.400000000000304
  episodes_this_iter: 3
  episodes_total: 2451
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8266018696430587e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.1874398115608427
          entropy_coeff: 0.009999999999999998
          kl: 0.015202877179393523
          policy_loss: -0.08510557562112808
          total_loss: 1.218670294351048
          vf_explained_var: 0.4573173522949219
          vf_loss: 1.3156502708792686
    num_agent_steps_sampled: 707000
    num_agent_steps_trained: 707000
    num_steps_sampled: 707000
    num_steps_trained: 707000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,707,19445.8,707000,-26.298,-21.5,-40.4,262.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 708000
  custom_metrics: {}
  date: 2021-10-29_02-30-41
  done: false
  episode_len_mean: 266.24
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.62400000000011
  episode_reward_min: -40.400000000000304
  episodes_this_iter: 3
  episodes_total: 2454
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8266018696430587e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.6881475925445557
          entropy_coeff: 0.009999999999999998
          kl: 0.014729776970196125
          policy_loss: 0.10266876936786705
          total_loss: 0.4636099229256312
          vf_explained_var: 0.7685598731040955
          vf_loss: 0.37782263395686944
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_steps_sampled: 708000
    num_steps_trained: 708000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,708,19466.3,708000,-26.624,-21.5,-40.4,266.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 709000
  custom_metrics: {}
  date: 2021-10-29_02-31-07
  done: false
  episode_len_mean: 266.09
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.60900000000011
  episode_reward_min: -40.400000000000304
  episodes_this_iter: 3
  episodes_total: 2457
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8266018696430587e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7520109810762935
          entropy_coeff: 0.009999999999999998
          kl: 0.009999249626949652
          policy_loss: -0.14717163915435474
          total_loss: 1.354889910750919
          vf_explained_var: 0.1202242448925972
          vf_loss: 1.5095816943380567
    num_agent_steps_sampled: 709000
    num_agent_steps_trained: 709000
    num_steps_sampled: 709000
    num_steps_trained: 709000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,709,19492.2,709000,-26.609,-21.5,-40.4,266.09




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 710000
  custom_metrics: {}
  date: 2021-10-29_02-31-52
  done: false
  episode_len_mean: 266.35
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -26.635000000000108
  episode_reward_min: -40.400000000000304
  episodes_this_iter: 4
  episodes_total: 2461
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8266018696430587e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.8570945668551657
          entropy_coeff: 0.009999999999999998
          kl: 0.029030998094307727
          policy_loss: -0.018522407942348055
          total_loss: 1.0030597786108653
          vf_explained_var: 0.47635066509246826
          vf_loss: 1.0301531101473504
    num_agent_steps_sampled: 710000
    num_agent_steps_trained: 710000
    num_steps_sampled: 710000
    num_steps_trained: 71000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,710,19536.5,710000,-26.635,-18.8,-40.4,266.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 711000
  custom_metrics: {}
  date: 2021-10-29_02-32-14
  done: false
  episode_len_mean: 269.31
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -26.93100000000011
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2464
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7399028044645875e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.7847394002808465
          entropy_coeff: 0.009999999999999998
          kl: 0.023742142576517408
          policy_loss: 0.1493403833773401
          total_loss: 0.566950862771935
          vf_explained_var: 0.7662312984466553
          vf_loss: 0.4354578781562547
    num_agent_steps_sampled: 711000
    num_agent_steps_trained: 711000
    num_steps_sampled: 711000
    num_steps_trained: 711000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,711,19558.8,711000,-26.931,-18.8,-42.5,269.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 712000
  custom_metrics: {}
  date: 2021-10-29_02-32-36
  done: false
  episode_len_mean: 271.84
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -27.18400000000012
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2467
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.109854206696882e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.593478759792116
          entropy_coeff: 0.009999999999999998
          kl: 0.024065470537052193
          policy_loss: 0.09478696866167916
          total_loss: 0.9070772469043732
          vf_explained_var: 0.4102095365524292
          vf_loss: 0.828225069720712
    num_agent_steps_sampled: 712000
    num_agent_steps_trained: 712000
    num_steps_sampled: 712000
    num_steps_trained: 712000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,712,19580.8,712000,-27.184,-18.8,-42.5,271.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 713000
  custom_metrics: {}
  date: 2021-10-29_02-32-59
  done: false
  episode_len_mean: 273.48
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -27.34800000000012
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2470
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.164781310045325e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.9118201110098096
          entropy_coeff: 0.009999999999999998
          kl: 0.027072492880168476
          policy_loss: -0.017727327595154444
          total_loss: 0.6774784757031335
          vf_explained_var: 0.4140690565109253
          vf_loss: 0.7143240006433593
    num_agent_steps_sampled: 713000
    num_agent_steps_trained: 713000
    num_steps_sampled: 713000
    num_steps_trained: 713000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,713,19603.8,713000,-27.348,-18.8,-42.5,273.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 714000
  custom_metrics: {}
  date: 2021-10-29_02-33-23
  done: false
  episode_len_mean: 274.65
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -27.465000000000114
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2473
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.247171965067982e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.9178363111284045
          entropy_coeff: 0.009999999999999998
          kl: 0.07024706389929838
          policy_loss: 0.037411511854992974
          total_loss: 1.4226700988080767
          vf_explained_var: 0.46520915627479553
          vf_loss: 1.4044369724061754
    num_agent_steps_sampled: 714000
    num_agent_steps_trained: 714000
    num_steps_sampled: 714000
    num_steps_trained: 714000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,714,19627.3,714000,-27.465,-18.8,-42.5,274.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 715000
  custom_metrics: {}
  date: 2021-10-29_02-33-50
  done: false
  episode_len_mean: 272.26
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -27.226000000000113
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2477
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3870757947601981e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.13366472745935123
          entropy_coeff: 0.009999999999999998
          kl: 0.0021518417846746415
          policy_loss: -0.09052798201640447
          total_loss: 1.2620761341518827
          vf_explained_var: 0.3423861563205719
          vf_loss: 1.3539407624138726
    num_agent_steps_sampled: 715000
    num_agent_steps_trained: 715000
    num_steps_sampled: 715000
    num_steps_trained: 71500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,715,19654.9,715000,-27.226,-18.8,-42.5,272.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 716000
  custom_metrics: {}
  date: 2021-10-29_02-34-14
  done: false
  episode_len_mean: 274.02
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -27.402000000000115
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2480
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.935378973800991e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.3884284092320336
          entropy_coeff: 0.009999999999999998
          kl: 0.026560567705579765
          policy_loss: -0.1285596722529994
          total_loss: 1.0574791083733242
          vf_explained_var: 0.4486473500728607
          vf_loss: 1.199923061993387
    num_agent_steps_sampled: 716000
    num_agent_steps_trained: 716000
    num_steps_sampled: 716000
    num_steps_trained: 716000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,716,19678.4,716000,-27.402,-18.8,-42.5,274.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 717000
  custom_metrics: {}
  date: 2021-10-29_02-34-38
  done: false
  episode_len_mean: 276.72
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -27.67200000000012
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2484
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403068460701486e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.4192039026154413
          entropy_coeff: 0.009999999999999998
          kl: 0.010941492107563514
          policy_loss: -0.013106947475009494
          total_loss: 1.3067196627457938
          vf_explained_var: 0.24286781251430511
          vf_loss: 1.3340186615784964
    num_agent_steps_sampled: 717000
    num_agent_steps_trained: 717000
    num_steps_sampled: 717000
    num_steps_trained: 717000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,717,19702.4,717000,-27.672,-18.8,-42.5,276.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 718000
  custom_metrics: {}
  date: 2021-10-29_02-35-00
  done: false
  episode_len_mean: 279.58
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -27.95800000000013
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 2
  episodes_total: 2486
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403068460701486e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.9303590708308749
          entropy_coeff: 0.009999999999999998
          kl: 0.018806662039521565
          policy_loss: -0.044992486304706994
          total_loss: 1.0999363746907975
          vf_explained_var: -0.15416201949119568
          vf_loss: 1.1642324444320467
    num_agent_steps_sampled: 718000
    num_agent_steps_trained: 718000
    num_steps_sampled: 718000
    num_steps_trained: 71800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,718,19724.1,718000,-27.958,-18.8,-42.5,279.58




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 719000
  custom_metrics: {}
  date: 2021-10-29_02-35-38
  done: false
  episode_len_mean: 282.92
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -28.29200000000013
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2490
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0403068460701486e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.867368381553226
          entropy_coeff: 0.009999999999999998
          kl: 0.026457426711308192
          policy_loss: 0.013348052402337392
          total_loss: 1.762480980820126
          vf_explained_var: 0.018673758953809738
          vf_loss: 1.7678066266907586
    num_agent_steps_sampled: 719000
    num_agent_steps_trained: 719000
    num_steps_sampled: 719000
    num_steps_trained: 719000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,719,19762.9,719000,-28.292,-18.8,-42.5,282.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 720000
  custom_metrics: {}
  date: 2021-10-29_02-36-05
  done: false
  episode_len_mean: 285.44
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -28.544000000000132
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2493
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5604602691052227e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.7262882418102687
          entropy_coeff: 0.009999999999999998
          kl: 0.024563774722214857
          policy_loss: 0.024667426695426306
          total_loss: 0.8527566625012292
          vf_explained_var: -0.05355209857225418
          vf_loss: 0.8453521060239938
    num_agent_steps_sampled: 720000
    num_agent_steps_trained: 720000
    num_steps_sampled: 720000
    num_steps_trained: 72000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,720,19789.2,720000,-28.544,-18.8,-42.5,285.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 721000
  custom_metrics: {}
  date: 2021-10-29_02-36-28
  done: false
  episode_len_mean: 287.76
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -28.77600000000014
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2496
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3406904036578343e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.997729402118259
          entropy_coeff: 0.009999999999999998
          kl: 0.02656219433240822
          policy_loss: 0.09652847233745787
          total_loss: 1.1080380217896568
          vf_explained_var: 0.2321038395166397
          vf_loss: 1.0314868279629283
    num_agent_steps_sampled: 721000
    num_agent_steps_trained: 721000
    num_steps_sampled: 721000
    num_steps_trained: 721000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,721,19812.1,721000,-28.776,-18.8,-42.5,287.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 722000
  custom_metrics: {}
  date: 2021-10-29_02-36-52
  done: false
  episode_len_mean: 289.56
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -28.95600000000014
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2499
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5110356054867502e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.6082524216837353
          entropy_coeff: 0.009999999999999998
          kl: 0.029662862760776794
          policy_loss: -0.03162312325504091
          total_loss: 1.006266521082984
          vf_explained_var: 0.36152076721191406
          vf_loss: 1.0539721713297896
    num_agent_steps_sampled: 722000
    num_agent_steps_trained: 722000
    num_steps_sampled: 722000
    num_steps_trained: 722000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,722,19836.9,722000,-28.956,-18.8,-42.5,289.56


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 723000
  custom_metrics: {}
  date: 2021-10-29_02-37-17
  done: false
  episode_len_mean: 291.47
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.147000000000133
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2502
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.266553408230128e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.9195313718583848
          entropy_coeff: 0.009999999999999998
          kl: 0.016261541755839402
          policy_loss: -0.10073140089710554
          total_loss: 1.2070756024784512
          vf_explained_var: 0.2232203185558319
          vf_loss: 1.3270023107528686
    num_agent_steps_sampled: 723000
    num_agent_steps_trained: 723000
    num_steps_sampled: 723000
    num_steps_trained: 723000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,723,19861.3,723000,-29.147,-18.8,-42.5,291.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 724000
  custom_metrics: {}
  date: 2021-10-29_02-37-46
  done: false
  episode_len_mean: 291.02
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.102000000000142
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2506
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.266553408230128e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.2107282310724259
          entropy_coeff: 0.009999999999999998
          kl: 0.015446798223901345
          policy_loss: -0.000409601628780365
          total_loss: 1.3930637071530023
          vf_explained_var: 0.08149179071187973
          vf_loss: 1.4055805891752242
    num_agent_steps_sampled: 724000
    num_agent_steps_trained: 724000
    num_steps_sampled: 724000
    num_steps_trained: 724000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,724,19890,724000,-29.102,-18.8,-42.5,291.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 725000
  custom_metrics: {}
  date: 2021-10-29_02-38-08
  done: false
  episode_len_mean: 291.64
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.164000000000147
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2510
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.266553408230128e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.8158610145250955
          entropy_coeff: 0.009999999999999998
          kl: 0.01738997799928169
          policy_loss: 0.053815952688455584
          total_loss: 1.0679543435573577
          vf_explained_var: 0.11309762299060822
          vf_loss: 1.0322970125410291
    num_agent_steps_sampled: 725000
    num_agent_steps_trained: 725000
    num_steps_sampled: 725000
    num_steps_trained: 725000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,725,19912.8,725000,-29.164,-18.8,-42.5,291.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 726000
  custom_metrics: {}
  date: 2021-10-29_02-38-36
  done: false
  episode_len_mean: 291.45
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.14500000000015
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2514
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.266553408230128e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.401418352127075
          entropy_coeff: 0.009999999999999998
          kl: 0.012315304449725773
          policy_loss: -0.007435048619906108
          total_loss: 1.6079155186812082
          vf_explained_var: 0.13237422704696655
          vf_loss: 1.6293647269407907
    num_agent_steps_sampled: 726000
    num_agent_steps_trained: 726000
    num_steps_sampled: 726000
    num_steps_trained: 726000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,726,19940,726000,-29.145,-18.8,-42.5,291.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 727000
  custom_metrics: {}
  date: 2021-10-29_02-39-04
  done: false
  episode_len_mean: 290.71
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.07100000000015
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2517
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.266553408230128e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.4144715865453084
          entropy_coeff: 0.009999999999999998
          kl: 0.010443000164904485
          policy_loss: -0.10970206442806456
          total_loss: 1.3743317094114091
          vf_explained_var: 0.21155714988708496
          vf_loss: 1.4981784800688425
    num_agent_steps_sampled: 727000
    num_agent_steps_trained: 727000
    num_steps_sampled: 727000
    num_steps_trained: 727000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,727,19968.4,727000,-29.071,-18.8,-42.5,290.71




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 728000
  custom_metrics: {}
  date: 2021-10-29_02-39-50
  done: false
  episode_len_mean: 289.42
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -28.942000000000142
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2521
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.266553408230128e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.3854520075851016
          entropy_coeff: 0.009999999999999998
          kl: 0.028312350071471808
          policy_loss: -0.08888955091436704
          total_loss: 1.6300048761897616
          vf_explained_var: 0.3315092623233795
          vf_loss: 1.7327489376068115
    num_agent_steps_sampled: 728000
    num_agent_steps_trained: 728000
    num_steps_sampled: 728000
    num_steps_trained: 728000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,728,20014.2,728000,-28.942,-18.8,-42.5,289.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 729000
  custom_metrics: {}
  date: 2021-10-29_02-40-18
  done: false
  episode_len_mean: 290.16
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.01600000000014
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2525
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.89983011234519e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.68667797115114
          entropy_coeff: 0.009999999999999998
          kl: 0.05643349850065233
          policy_loss: 0.007441182931264241
          total_loss: 1.2532533744970957
          vf_explained_var: 0.2780294418334961
          vf_loss: 1.2626789702309502
    num_agent_steps_sampled: 729000
    num_agent_steps_trained: 729000
    num_steps_sampled: 729000
    num_steps_trained: 729000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,729,20042,729000,-29.016,-18.8,-42.5,290.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 730000
  custom_metrics: {}
  date: 2021-10-29_02-40-46
  done: false
  episode_len_mean: 291.17
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.117000000000147
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2529
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1849745168517783e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.011171978049808
          entropy_coeff: 0.009999999999999998
          kl: 0.010857414347013932
          policy_loss: -0.02576347962021828
          total_loss: 1.0483897460831537
          vf_explained_var: 0.3785545825958252
          vf_loss: 1.0842649612161848
    num_agent_steps_sampled: 730000
    num_agent_steps_trained: 730000
    num_steps_sampled: 730000
    num_steps_trained: 730000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,730,20070.8,730000,-29.117,-18.8,-42.5,291.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 731000
  custom_metrics: {}
  date: 2021-10-29_02-41-14
  done: false
  episode_len_mean: 292.1
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.21000000000014
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2533
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1849745168517783e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2873027245203654
          entropy_coeff: 0.009999999999999998
          kl: 0.016989221912263337
          policy_loss: 0.05962256516019503
          total_loss: 0.897515227066146
          vf_explained_var: 0.5100459456443787
          vf_loss: 0.8507656928565767
    num_agent_steps_sampled: 731000
    num_agent_steps_trained: 731000
    num_steps_sampled: 731000
    num_steps_trained: 731000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,731,20097.9,731000,-29.21,-18.8,-42.5,292.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 732000
  custom_metrics: {}
  date: 2021-10-29_02-41-42
  done: false
  episode_len_mean: 292.17
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.217000000000148
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2537
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1849745168517783e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.4352478563785553
          entropy_coeff: 0.009999999999999998
          kl: 0.013248744547629738
          policy_loss: 0.028018635594182544
          total_loss: 1.124831007586585
          vf_explained_var: 0.5776640772819519
          vf_loss: 1.1111648496654298
    num_agent_steps_sampled: 732000
    num_agent_steps_trained: 732000
    num_steps_sampled: 732000
    num_steps_trained: 732000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,732,20126.2,732000,-29.217,-18.8,-42.5,292.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 733000
  custom_metrics: {}
  date: 2021-10-29_02-42-08
  done: false
  episode_len_mean: 291.92
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.192000000000142
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 3
  episodes_total: 2540
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1849745168517783e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.7246564957830641
          entropy_coeff: 0.009999999999999998
          kl: 0.017009814486716242
          policy_loss: 0.015260617269410028
          total_loss: 0.8066773924562666
          vf_explained_var: 0.33275461196899414
          vf_loss: 0.8086633298132154
    num_agent_steps_sampled: 733000
    num_agent_steps_trained: 733000
    num_steps_sampled: 733000
    num_steps_trained: 733000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,733,20152.2,733000,-29.192,-18.8,-42.5,291.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 734000
  custom_metrics: {}
  date: 2021-10-29_02-42-35
  done: false
  episode_len_mean: 290.99
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.09900000000014
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2544
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1849745168517783e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.341272179947959
          entropy_coeff: 0.009999999999999998
          kl: 0.012388999946344528
          policy_loss: -0.06271695097287495
          total_loss: 1.4702218075593312
          vf_explained_var: 0.245433509349823
          vf_loss: 1.5463514897558424
    num_agent_steps_sampled: 734000
    num_agent_steps_trained: 734000
    num_steps_sampled: 734000
    num_steps_trained: 734000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,734,20179,734000,-29.099,-18.8,-42.5,290.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 735000
  custom_metrics: {}
  date: 2021-10-29_02-43-04
  done: false
  episode_len_mean: 290.34
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -29.034000000000137
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2548
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1849745168517783e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.511811704105801
          entropy_coeff: 0.009999999999999998
          kl: 0.010548818492237584
          policy_loss: -0.02484494720896085
          total_loss: 0.6197676195038689
          vf_explained_var: 0.7617942690849304
          vf_loss: 0.6597306728363037
    num_agent_steps_sampled: 735000
    num_agent_steps_trained: 735000
    num_steps_sampled: 735000
    num_steps_trained: 735000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,735,20207.7,735000,-29.034,-18.8,-42.5,290.34




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 736000
  custom_metrics: {}
  date: 2021-10-29_02-43-48
  done: false
  episode_len_mean: 288.12
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -28.812000000000136
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2552
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1849745168517783e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.2986946947044797
          entropy_coeff: 0.009999999999999998
          kl: 0.044905100933965277
          policy_loss: 0.033215885650780465
          total_loss: 1.147258620791965
          vf_explained_var: 0.5542802810668945
          vf_loss: 1.127029698424869
    num_agent_steps_sampled: 736000
    num_agent_steps_trained: 736000
    num_steps_sampled: 736000
    num_steps_trained: 736000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,736,20251.7,736000,-28.812,-18.8,-42.5,288.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 737000
  custom_metrics: {}
  date: 2021-10-29_02-44-17
  done: false
  episode_len_mean: 284.46
  episode_media: {}
  episode_reward_max: -18.799999999999997
  episode_reward_mean: -28.44600000000013
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2556
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7774617752776676e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0823579053084056
          entropy_coeff: 0.009999999999999998
          kl: 0.013564290917306545
          policy_loss: 0.08915193966693348
          total_loss: 0.8193003442552355
          vf_explained_var: 0.6054219007492065
          vf_loss: 0.7409719811545478
    num_agent_steps_sampled: 737000
    num_agent_steps_trained: 737000
    num_steps_sampled: 737000
    num_steps_trained: 737000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,737,20280.9,737000,-28.446,-18.8,-42.5,284.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 738000
  custom_metrics: {}
  date: 2021-10-29_02-44-46
  done: false
  episode_len_mean: 284.7
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -28.470000000000137
  episode_reward_min: -42.500000000000334
  episodes_this_iter: 4
  episodes_total: 2560
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7774617752776676e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.1030343837208219
          entropy_coeff: 0.009999999999999998
          kl: 0.009919524346731932
          policy_loss: 0.033940013953381115
          total_loss: 0.8912435789903005
          vf_explained_var: 0.5791488885879517
          vf_loss: 0.8683339092466567
    num_agent_steps_sampled: 738000
    num_agent_steps_trained: 738000
    num_steps_sampled: 738000
    num_steps_trained: 738000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,738,20309.9,738000,-28.47,-19.3,-42.5,284.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 739000
  custom_metrics: {}
  date: 2021-10-29_02-45-13
  done: false
  episode_len_mean: 281.08
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -28.108000000000125
  episode_reward_min: -42.40000000000033
  episodes_this_iter: 4
  episodes_total: 2564
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7774617752776676e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.5696299910545348
          entropy_coeff: 0.009999999999999998
          kl: 0.028342062020065742
          policy_loss: 0.02235868184102906
          total_loss: 1.101357764667935
          vf_explained_var: 0.49401745200157166
          vf_loss: 1.0946953852971395
    num_agent_steps_sampled: 739000
    num_agent_steps_trained: 739000
    num_steps_sampled: 739000
    num_steps_trained: 739000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,739,20336.6,739000,-28.108,-19.3,-42.4,281.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 740000
  custom_metrics: {}
  date: 2021-10-29_02-45-41
  done: false
  episode_len_mean: 278.86
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.886000000000127
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 2567
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6661926629165024e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.809462939368354
          entropy_coeff: 0.009999999999999998
          kl: 0.01757387528621529
          policy_loss: -0.1360578898754385
          total_loss: 0.4167508456442091
          vf_explained_var: 0.8382212519645691
          vf_loss: 0.5709033618370692
    num_agent_steps_sampled: 740000
    num_agent_steps_trained: 740000
    num_steps_sampled: 740000
    num_steps_trained: 740000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,740,20365,740000,-27.886,-19.3,-38.6,278.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 741000
  custom_metrics: {}
  date: 2021-10-29_02-46-09
  done: false
  episode_len_mean: 275.9
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.590000000000117
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 2571
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6661926629165024e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.6541980663935343
          entropy_coeff: 0.009999999999999998
          kl: 0.01404700680767874
          policy_loss: 0.03997680072983106
          total_loss: 0.8552693651782142
          vf_explained_var: 0.4579339623451233
          vf_loss: 0.8318345407644908
    num_agent_steps_sampled: 741000
    num_agent_steps_trained: 741000
    num_steps_sampled: 741000
    num_steps_trained: 741000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,741,20393.5,741000,-27.59,-19.3,-38.6,275.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 742000
  custom_metrics: {}
  date: 2021-10-29_02-46-37
  done: false
  episode_len_mean: 275.28
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.528000000000123
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 2575
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6661926629165024e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.05018925997946
          entropy_coeff: 0.009999999999999998
          kl: 0.026193180181194087
          policy_loss: -0.017261793712774913
          total_loss: 0.47304543488555484
          vf_explained_var: 0.7398533225059509
          vf_loss: 0.5008091145091587
    num_agent_steps_sampled: 742000
    num_agent_steps_trained: 742000
    num_steps_sampled: 742000
    num_steps_trained: 742000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,742,20421.4,742000,-27.528,-19.3,-38.6,275.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 743000
  custom_metrics: {}
  date: 2021-10-29_02-47-04
  done: false
  episode_len_mean: 275.75
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.575000000000124
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 2579
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9992889943747515e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.519704724682702
          entropy_coeff: 0.009999999999999998
          kl: 0.020564310936857153
          policy_loss: 0.07589314497179456
          total_loss: 0.7588276346524556
          vf_explained_var: 0.4623256325721741
          vf_loss: 0.6981315440601773
    num_agent_steps_sampled: 743000
    num_agent_steps_trained: 743000
    num_steps_sampled: 743000
    num_steps_trained: 743000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,743,20447.8,743000,-27.575,-19.3,-38.6,275.75




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 744000
  custom_metrics: {}
  date: 2021-10-29_02-47-48
  done: false
  episode_len_mean: 273.47
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.347000000000115
  episode_reward_min: -37.000000000000256
  episodes_this_iter: 4
  episodes_total: 2583
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.99893349156213e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.5329200002882215
          entropy_coeff: 0.009999999999999998
          kl: 0.06756065913952303
          policy_loss: 0.06087144480811225
          total_loss: 0.9978478729724884
          vf_explained_var: 0.32424497604370117
          vf_loss: 0.952305625544654
    num_agent_steps_sampled: 744000
    num_agent_steps_trained: 744000
    num_steps_sampled: 744000
    num_steps_trained: 744000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,744,20491.6,744000,-27.347,-19.3,-37,273.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 745000
  custom_metrics: {}
  date: 2021-10-29_02-48-14
  done: false
  episode_len_mean: 271.51
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -27.151000000000117
  episode_reward_min: -35.600000000000236
  episodes_this_iter: 3
  episodes_total: 2586
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.998400237343195e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.9239649309052362
          entropy_coeff: 0.009999999999999998
          kl: 0.021319717941029145
          policy_loss: 0.035676121049457124
          total_loss: 0.6894952707820469
          vf_explained_var: 0.1471814066171646
          vf_loss: 0.6730587910446856
    num_agent_steps_sampled: 745000
    num_agent_steps_trained: 745000
    num_steps_sampled: 745000
    num_steps_trained: 745000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,745,20517.6,745000,-27.151,-19.3,-35.6,271.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 746000
  custom_metrics: {}
  date: 2021-10-29_02-48-42
  done: false
  episode_len_mean: 268.59
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.85900000000011
  episode_reward_min: -35.600000000000236
  episodes_this_iter: 4
  episodes_total: 2590
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.349760035601479e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2638237118721007
          entropy_coeff: 0.009999999999999998
          kl: 0.019256271399314625
          policy_loss: 0.04673268480433358
          total_loss: 0.7222367432382372
          vf_explained_var: 0.5748281478881836
          vf_loss: 0.6881422893868552
    num_agent_steps_sampled: 746000
    num_agent_steps_trained: 746000
    num_steps_sampled: 746000
    num_steps_trained: 746000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,746,20545.6,746000,-26.859,-19.3,-35.6,268.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 747000
  custom_metrics: {}
  date: 2021-10-29_02-49-09
  done: false
  episode_len_mean: 267.1
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.710000000000107
  episode_reward_min: -35.600000000000236
  episodes_this_iter: 4
  episodes_total: 2594
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.349760035601479e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.8261724140908984
          entropy_coeff: 0.009999999999999998
          kl: 0.009101589421301091
          policy_loss: 0.014859456651740604
          total_loss: 0.935070671637853
          vf_explained_var: 0.3521689176559448
          vf_loss: 0.9384729332394071
    num_agent_steps_sampled: 747000
    num_agent_steps_trained: 747000
    num_steps_sampled: 747000
    num_steps_trained: 747000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,747,20572.6,747000,-26.71,-19.3,-35.6,267.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 748000
  custom_metrics: {}
  date: 2021-10-29_02-49-37
  done: false
  episode_len_mean: 263.74
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.374000000000105
  episode_reward_min: -33.2000000000002
  episodes_this_iter: 4
  episodes_total: 2598
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.349760035601479e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.248306668466992
          entropy_coeff: 0.009999999999999998
          kl: 0.053860612266841676
          policy_loss: 0.11312624282307095
          total_loss: 0.8763995498418808
          vf_explained_var: 0.6437444686889648
          vf_loss: 0.7757563614596923
    num_agent_steps_sampled: 748000
    num_agent_steps_trained: 748000
    num_steps_sampled: 748000
    num_steps_trained: 748000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,748,20601,748000,-26.374,-19.3,-33.2,263.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 749000
  custom_metrics: {}
  date: 2021-10-29_02-50-04
  done: false
  episode_len_mean: 263.62
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.36200000000011
  episode_reward_min: -33.2000000000002
  episodes_this_iter: 3
  episodes_total: 2601
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0246400534022192e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2957164274321662
          entropy_coeff: 0.009999999999999998
          kl: 0.06497107886964054
          policy_loss: 0.044661213788721295
          total_loss: 0.7339570850133896
          vf_explained_var: 0.6357859969139099
          vf_loss: 0.7022530357042949
    num_agent_steps_sampled: 749000
    num_agent_steps_trained: 749000
    num_steps_sampled: 749000
    num_steps_trained: 749000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,749,20628.1,749000,-26.362,-19.3,-33.2,263.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 750000
  custom_metrics: {}
  date: 2021-10-29_02-50-31
  done: false
  episode_len_mean: 263.51
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.351000000000113
  episode_reward_min: -33.2000000000002
  episodes_this_iter: 4
  episodes_total: 2605
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.036960080103328e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3464499844445124
          entropy_coeff: 0.009999999999999998
          kl: 0.025176613259644792
          policy_loss: -0.022770032534996668
          total_loss: 1.1048579189512464
          vf_explained_var: 0.37765786051750183
          vf_loss: 1.1410924527380202
    num_agent_steps_sampled: 750000
    num_agent_steps_trained: 750000
    num_steps_sampled: 750000
    num_steps_trained: 750000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,750,20654.6,750000,-26.351,-19.3,-33.2,263.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 751000
  custom_metrics: {}
  date: 2021-10-29_02-50-57
  done: false
  episode_len_mean: 263.6
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.36000000000011
  episode_reward_min: -32.40000000000019
  episodes_this_iter: 4
  episodes_total: 2609
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5554401201549927e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.53245058854421
          entropy_coeff: 0.009999999999999998
          kl: 0.021084589757832835
          policy_loss: 0.027233585549725426
          total_loss: 0.879737169875039
          vf_explained_var: 0.3912719190120697
          vf_loss: 0.8678280909856161
    num_agent_steps_sampled: 751000
    num_agent_steps_trained: 751000
    num_steps_sampled: 751000
    num_steps_trained: 751000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,751,20680.8,751000,-26.36,-19.3,-32.4,263.6




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 752000
  custom_metrics: {}
  date: 2021-10-29_02-51-45
  done: false
  episode_len_mean: 262.24
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.224000000000107
  episode_reward_min: -32.40000000000019
  episodes_this_iter: 4
  episodes_total: 2613
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.833160180232489e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1556567867596945
          entropy_coeff: 0.009999999999999998
          kl: 0.005930005700407687
          policy_loss: 0.040204998354117075
          total_loss: 0.8458094524012671
          vf_explained_var: 0.5296902060508728
          vf_loss: 0.8171610123581357
    num_agent_steps_sampled: 752000
    num_agent_steps_trained: 752000
    num_steps_sampled: 752000
    num_steps_trained: 752000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,752,20728.4,752000,-26.224,-19.3,-32.4,262.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 753000
  custom_metrics: {}
  date: 2021-10-29_02-52-09
  done: false
  episode_len_mean: 262.86
  episode_media: {}
  episode_reward_max: -19.300000000000004
  episode_reward_mean: -26.2860000000001
  episode_reward_min: -32.40000000000019
  episodes_this_iter: 3
  episodes_total: 2616
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.833160180232489e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2165153404076894
          entropy_coeff: 0.009999999999999998
          kl: 0.011022834409643778
          policy_loss: -0.10241828511158625
          total_loss: 0.8038571082883411
          vf_explained_var: 0.32139575481414795
          vf_loss: 0.9184405452675289
    num_agent_steps_sampled: 753000
    num_agent_steps_trained: 753000
    num_steps_sampled: 753000
    num_steps_trained: 753000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,753,20753.1,753000,-26.286,-19.3,-32.4,262.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 754000
  custom_metrics: {}
  date: 2021-10-29_02-52-38
  done: false
  episode_len_mean: 263.78
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.378000000000107
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 2620
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.833160180232489e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1250926170084212
          entropy_coeff: 0.009999999999999998
          kl: 0.023748425091643415
          policy_loss: 0.012653364199731084
          total_loss: 1.1093195180098216
          vf_explained_var: 0.22579261660575867
          vf_loss: 1.1079170697265202
    num_agent_steps_sampled: 754000
    num_agent_steps_trained: 754000
    num_steps_sampled: 754000
    num_steps_trained: 754000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,754,20781.8,754000,-26.378,-20.4,-32.5,263.78


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 755000
  custom_metrics: {}
  date: 2021-10-29_02-53-06
  done: false
  episode_len_mean: 263.12
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.312000000000104
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 2624
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.024974027034873e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1784784760740068
          entropy_coeff: 0.009999999999999998
          kl: 0.0440557882776642
          policy_loss: -0.010016344404882855
          total_loss: 0.8093663871288299
          vf_explained_var: 0.4247150421142578
          vf_loss: 0.8311675204171075
    num_agent_steps_sampled: 755000
    num_agent_steps_trained: 755000
    num_steps_sampled: 755000
    num_steps_trained: 755000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,755,20809.5,755000,-26.312,-20.4,-32.5,263.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 756000
  custom_metrics: {}
  date: 2021-10-29_02-53-33
  done: false
  episode_len_mean: 263.35
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.3350000000001
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 2628
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5374610405523096e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.4627097090085348
          entropy_coeff: 0.009999999999999998
          kl: 0.012293498199405109
          policy_loss: 0.09663406850563155
          total_loss: 0.8060871435536279
          vf_explained_var: 0.5160549879074097
          vf_loss: 0.7240801771481832
    num_agent_steps_sampled: 756000
    num_agent_steps_trained: 756000
    num_steps_sampled: 756000
    num_steps_trained: 756000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,756,20837.1,756000,-26.335,-20.4,-32.5,263.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 757000
  custom_metrics: {}
  date: 2021-10-29_02-54-00
  done: false
  episode_len_mean: 263.42
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.34200000000011
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 3
  episodes_total: 2631
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5374610405523096e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.418159121937222
          entropy_coeff: 0.009999999999999998
          kl: 0.025287516760356402
          policy_loss: -0.07095762516061464
          total_loss: 0.9482988946967654
          vf_explained_var: 0.3626389801502228
          vf_loss: 1.0334381037288243
    num_agent_steps_sampled: 757000
    num_agent_steps_trained: 757000
    num_steps_sampled: 757000
    num_steps_trained: 757000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,757,20863.3,757000,-26.342,-20.4,-32.5,263.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 758000
  custom_metrics: {}
  date: 2021-10-29_02-54-27
  done: false
  episode_len_mean: 264.14
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.414000000000105
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 2635
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3061915608284638e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.4522308667500814
          entropy_coeff: 0.009999999999999998
          kl: 0.028634222280701414
          policy_loss: 0.010967046353552076
          total_loss: 1.269442179467943
          vf_explained_var: 0.28293707966804504
          vf_loss: 1.272997447517183
    num_agent_steps_sampled: 758000
    num_agent_steps_trained: 758000
    num_steps_sampled: 758000
    num_steps_trained: 758000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,758,20890.7,758000,-26.414,-20.4,-32.5,264.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 759000
  custom_metrics: {}
  date: 2021-10-29_02-54-53
  done: false
  episode_len_mean: 264.61
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.461000000000105
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 4
  episodes_total: 2639
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.459287341242695e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.7390822543038262
          entropy_coeff: 0.009999999999999998
          kl: 0.010784014160494987
          policy_loss: 0.05830606172482173
          total_loss: 1.08055260181427
          vf_explained_var: 0.296315461397171
          vf_loss: 1.0396373669306438
    num_agent_steps_sampled: 759000
    num_agent_steps_trained: 759000
    num_steps_sampled: 759000
    num_steps_trained: 759000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,759,20916.9,759000,-26.461,-20.4,-32.5,264.61




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 760000
  custom_metrics: {}
  date: 2021-10-29_02-55-40
  done: false
  episode_len_mean: 264.54
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.454000000000107
  episode_reward_min: -32.50000000000019
  episodes_this_iter: 3
  episodes_total: 2642
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.459287341242695e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6293526464038426
          entropy_coeff: 0.009999999999999998
          kl: 0.018701208590782413
          policy_loss: -0.11612930819392205
          total_loss: 1.0105247497558594
          vf_explained_var: 0.20719057321548462
          vf_loss: 1.142947581079271
    num_agent_steps_sampled: 760000
    num_agent_steps_trained: 760000
    num_steps_sampled: 760000
    num_steps_trained: 760000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,760,20963.2,760000,-26.454,-20.4,-32.5,264.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 761000
  custom_metrics: {}
  date: 2021-10-29_02-56-02
  done: false
  episode_len_mean: 266.47
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.64700000000011
  episode_reward_min: -36.300000000000246
  episodes_this_iter: 4
  episodes_total: 2646
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.459287341242695e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.3864964657359653
          entropy_coeff: 0.009999999999999998
          kl: 0.018467515753929024
          policy_loss: -0.02957530758447117
          total_loss: 1.0305621445178985
          vf_explained_var: 0.2782529294490814
          vf_loss: 1.0740024136172401
    num_agent_steps_sampled: 761000
    num_agent_steps_trained: 761000
    num_steps_sampled: 761000
    num_steps_trained: 761000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,761,20985.3,761000,-26.647,-20.4,-36.3,266.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 762000
  custom_metrics: {}
  date: 2021-10-29_02-56-29
  done: false
  episode_len_mean: 267.16
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.716000000000104
  episode_reward_min: -36.300000000000246
  episodes_this_iter: 3
  episodes_total: 2649
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.459287341242695e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.428686006863912
          entropy_coeff: 0.009999999999999998
          kl: 0.010193573438396757
          policy_loss: 0.015629354367653527
          total_loss: 0.6236119755854209
          vf_explained_var: 0.5194427967071533
          vf_loss: 0.6222694876293341
    num_agent_steps_sampled: 762000
    num_agent_steps_trained: 762000
    num_steps_sampled: 762000
    num_steps_trained: 762000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,762,21012.7,762000,-26.716,-20.4,-36.3,267.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 763000
  custom_metrics: {}
  date: 2021-10-29_02-56-53
  done: false
  episode_len_mean: 268.31
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.83100000000011
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 2652
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.459287341242695e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.321124878194597
          entropy_coeff: 0.009999999999999998
          kl: 0.030012943439920607
          policy_loss: -0.12313275982936224
          total_loss: 0.8393153654204475
          vf_explained_var: 0.3649490475654602
          vf_loss: 0.9756593664487203
    num_agent_steps_sampled: 763000
    num_agent_steps_trained: 763000
    num_steps_sampled: 763000
    num_steps_trained: 763000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,763,21037.1,763000,-26.831,-20.4,-41.6,268.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 764000
  custom_metrics: {}
  date: 2021-10-29_02-57-19
  done: false
  episode_len_mean: 269.42
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -26.94200000000012
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 2656
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.188931011864044e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.2103697339693704
          entropy_coeff: 0.009999999999999998
          kl: 0.012205890079250676
          policy_loss: 0.021523516376813254
          total_loss: 1.1385982155799865
          vf_explained_var: 0.17142821848392487
          vf_loss: 1.1291783915625677
    num_agent_steps_sampled: 764000
    num_agent_steps_trained: 764000
    num_steps_sampled: 764000
    num_steps_trained: 764000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,764,21062.5,764000,-26.942,-20.4,-41.6,269.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 765000
  custom_metrics: {}
  date: 2021-10-29_02-57-46
  done: false
  episode_len_mean: 270.72
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.072000000000116
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 2660
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.188931011864044e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.361465675301022
          entropy_coeff: 0.009999999999999998
          kl: 0.02696776723245012
          policy_loss: 0.04597887438204554
          total_loss: 0.8049711538685693
          vf_explained_var: 0.30726081132888794
          vf_loss: 0.7726069351037343
    num_agent_steps_sampled: 765000
    num_agent_steps_trained: 765000
    num_steps_sampled: 765000
    num_steps_trained: 765000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,765,21089.5,765000,-27.072,-20.4,-41.6,270.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 766000
  custom_metrics: {}
  date: 2021-10-29_02-58-13
  done: false
  episode_len_mean: 270.72
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.072000000000116
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 2663
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.783396517796065e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.32329208056132
          entropy_coeff: 0.009999999999999998
          kl: 0.03665327564380997
          policy_loss: -0.02390741871462928
          total_loss: 0.7465054903593328
          vf_explained_var: 0.0525064580142498
          vf_loss: 0.7836458367606004
    num_agent_steps_sampled: 766000
    num_agent_steps_trained: 766000
    num_steps_sampled: 766000
    num_steps_trained: 766000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,766,21116.9,766000,-27.072,-20.4,-41.6,270.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 767000
  custom_metrics: {}
  date: 2021-10-29_02-58-42
  done: false
  episode_len_mean: 270.65
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.065000000000115
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 2667
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1675094776694101e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4405963248676723
          entropy_coeff: 0.009999999999999998
          kl: 0.04248753756457021
          policy_loss: -0.007034136603275935
          total_loss: 1.094912830988566
          vf_explained_var: 0.2977865934371948
          vf_loss: 1.1163529336452485
    num_agent_steps_sampled: 767000
    num_agent_steps_trained: 767000
    num_steps_sampled: 767000
    num_steps_trained: 767000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,767,21145.1,767000,-27.065,-20.4,-41.6,270.65




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 768000
  custom_metrics: {}
  date: 2021-10-29_02-59-23
  done: false
  episode_len_mean: 271.59
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.15900000000012
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 2671
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7512642165041148e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5918722881211176
          entropy_coeff: 0.009999999999999998
          kl: 0.018149039307961978
          policy_loss: 0.05333438325259421
          total_loss: 1.1183090130488078
          vf_explained_var: 0.4203456938266754
          vf_loss: 1.080893365210957
    num_agent_steps_sampled: 768000
    num_agent_steps_trained: 768000
    num_steps_sampled: 768000
    num_steps_trained: 768000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,768,21186.5,768000,-27.159,-20.4,-41.6,271.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 769000
  custom_metrics: {}
  date: 2021-10-29_02-59-44
  done: false
  episode_len_mean: 274.72
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.472000000000115
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 2
  episodes_total: 2673
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7512642165041148e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6117452475759717
          entropy_coeff: 0.009999999999999998
          kl: 0.022583513932992478
          policy_loss: -0.10328198009067112
          total_loss: 0.832736376590199
          vf_explained_var: -0.22915278375148773
          vf_loss: 0.9521357945270008
    num_agent_steps_sampled: 769000
    num_agent_steps_trained: 769000
    num_steps_sampled: 769000
    num_steps_trained: 769000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,769,21207.8,769000,-27.472,-20.4,-42.3,274.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 770000
  custom_metrics: {}
  date: 2021-10-29_03-00-10
  done: false
  episode_len_mean: 275.61
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.56100000000012
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 4
  episodes_total: 2677
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.626896324756172e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.876787132687039
          entropy_coeff: 0.009999999999999998
          kl: 0.017852386211902486
          policy_loss: 0.020017962654431662
          total_loss: 0.6883942537837558
          vf_explained_var: 0.3366244435310364
          vf_loss: 0.6871441615952386
    num_agent_steps_sampled: 770000
    num_agent_steps_trained: 770000
    num_steps_sampled: 770000
    num_steps_trained: 770000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,770,21233.3,770000,-27.561,-20.4,-42.3,275.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 771000
  custom_metrics: {}
  date: 2021-10-29_03-00-36
  done: false
  episode_len_mean: 275.89
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.58900000000012
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 4
  episodes_total: 2681
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.626896324756172e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.686476476987203
          entropy_coeff: 0.009999999999999998
          kl: 0.013258134920554375
          policy_loss: 0.006784543146689733
          total_loss: 1.0029141969150968
          vf_explained_var: 0.23051516711711884
          vf_loss: 1.0129944185415904
    num_agent_steps_sampled: 771000
    num_agent_steps_trained: 771000
    num_steps_sampled: 771000
    num_steps_trained: 771000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,771,21259.5,771000,-27.589,-20.4,-42.3,275.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 772000
  custom_metrics: {}
  date: 2021-10-29_03-01-07
  done: false
  episode_len_mean: 275.94
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.59400000000012
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 3
  episodes_total: 2684
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.626896324756172e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5171739863024818
          entropy_coeff: 0.009999999999999998
          kl: 0.020149889421007098
          policy_loss: -0.01965080565876431
          total_loss: 0.883703627023432
          vf_explained_var: -0.019656090065836906
          vf_loss: 0.9185261658496326
    num_agent_steps_sampled: 772000
    num_agent_steps_trained: 772000
    num_steps_sampled: 772000
    num_steps_trained: 772000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,772,21290.5,772000,-27.594,-20.4,-42.3,275.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 773000
  custom_metrics: {}
  date: 2021-10-29_03-01-34
  done: false
  episode_len_mean: 275.42
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.542000000000122
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 4
  episodes_total: 2688
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.940344487134258e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7656587534480626
          entropy_coeff: 0.009999999999999998
          kl: 0.015290308953898565
          policy_loss: 0.06948497353328599
          total_loss: 1.0725057264169058
          vf_explained_var: 0.27793389558792114
          vf_loss: 1.020677341355218
    num_agent_steps_sampled: 773000
    num_agent_steps_trained: 773000
    num_steps_sampled: 773000
    num_steps_trained: 773000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,773,21317.4,773000,-27.542,-20.4,-42.3,275.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 774000
  custom_metrics: {}
  date: 2021-10-29_03-01-59
  done: false
  episode_len_mean: 276.75
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.67500000000012
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 3
  episodes_total: 2691
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.940344487134258e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4803957210646734
          entropy_coeff: 0.009999999999999998
          kl: 0.013888988572723832
          policy_loss: 0.013213046971294615
          total_loss: 0.882003993458218
          vf_explained_var: 0.18194985389709473
          vf_loss: 0.8835948857996199
    num_agent_steps_sampled: 774000
    num_agent_steps_trained: 774000
    num_steps_sampled: 774000
    num_steps_trained: 774000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,774,21342.6,774000,-27.675,-20.4,-42.3,276.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 775000
  custom_metrics: {}
  date: 2021-10-29_03-02-28
  done: false
  episode_len_mean: 276.7
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.670000000000122
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 4
  episodes_total: 2695
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.940344487134258e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.8507162743144565
          entropy_coeff: 0.009999999999999998
          kl: 0.013446387172421402
          policy_loss: -0.0005138277179665035
          total_loss: 1.3363570080863105
          vf_explained_var: 0.030102504417300224
          vf_loss: 1.3553779866960314
    num_agent_steps_sampled: 775000
    num_agent_steps_trained: 775000
    num_steps_sampled: 775000
    num_steps_trained: 775000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,775,21370.9,775000,-27.67,-20.4,-42.3,276.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 776000
  custom_metrics: {}
  date: 2021-10-29_03-02-51
  done: false
  episode_len_mean: 277.63
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.763000000000126
  episode_reward_min: -42.30000000000033
  episodes_this_iter: 3
  episodes_total: 2698
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.940344487134258e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.47626051902771
          entropy_coeff: 0.009999999999999998
          kl: 0.010605405466096663
          policy_loss: -0.09063395394219292
          total_loss: 1.382335576746199
          vf_explained_var: 0.23949532210826874
          vf_loss: 1.4877321349249946
    num_agent_steps_sampled: 776000
    num_agent_steps_trained: 776000
    num_steps_sampled: 776000
    num_steps_trained: 776000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,776,21394.1,776000,-27.763,-20.4,-42.3,277.63




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 777000
  custom_metrics: {}
  date: 2021-10-29_03-03-33
  done: false
  episode_len_mean: 279.33
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.933000000000124
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 3
  episodes_total: 2701
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.940344487134258e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.229187645514806
          entropy_coeff: 0.009999999999999998
          kl: 0.025131912865035844
          policy_loss: -0.07773798820045259
          total_loss: 1.475936812824673
          vf_explained_var: 0.2700847387313843
          vf_loss: 1.5659666763411628
    num_agent_steps_sampled: 777000
    num_agent_steps_trained: 777000
    num_steps_sampled: 777000
    num_steps_trained: 777000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,777,21436.2,777000,-27.933,-20.4,-47.5,279.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 778000
  custom_metrics: {}
  date: 2021-10-29_03-03-59
  done: false
  episode_len_mean: 279.95
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -27.99500000000013
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 4
  episodes_total: 2705
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.910516730701387e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4045981102519565
          entropy_coeff: 0.009999999999999998
          kl: 0.026350269758067195
          policy_loss: 0.0187787517077393
          total_loss: 1.3788743668132357
          vf_explained_var: 0.16156017780303955
          vf_loss: 1.3741415633095635
    num_agent_steps_sampled: 778000
    num_agent_steps_trained: 778000
    num_steps_sampled: 778000
    num_steps_trained: 778000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,778,21462.4,778000,-27.995,-20.4,-47.5,279.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 779000
  custom_metrics: {}
  date: 2021-10-29_03-04-27
  done: false
  episode_len_mean: 280.08
  episode_media: {}
  episode_reward_max: -20.40000000000002
  episode_reward_mean: -28.00800000000013
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 4
  episodes_total: 2709
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.865775096052081e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2699077367782592
          entropy_coeff: 0.009999999999999998
          kl: 0.032132495098682906
          policy_loss: -0.005955973184771008
          total_loss: 0.9945776111549801
          vf_explained_var: 0.2058698832988739
          vf_loss: 1.0132326496971977
    num_agent_steps_sampled: 779000
    num_agent_steps_trained: 779000
    num_steps_sampled: 779000
    num_steps_trained: 779000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,779,21489.8,779000,-28.008,-20.4,-47.5,280.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 780000
  custom_metrics: {}
  date: 2021-10-29_03-04-53
  done: false
  episode_len_mean: 280.49
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.049000000000127
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 3
  episodes_total: 2712
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3298662644078125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.4610138575236002
          entropy_coeff: 0.009999999999999998
          kl: 0.013236997123705407
          policy_loss: -0.05024157952931192
          total_loss: 0.9918888310591379
          vf_explained_var: 0.32486194372177124
          vf_loss: 1.056740548213323
    num_agent_steps_sampled: 780000
    num_agent_steps_trained: 780000
    num_steps_sampled: 780000
    num_steps_trained: 780000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,780,21516.6,780000,-28.049,-22.8,-47.5,280.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 781000
  custom_metrics: {}
  date: 2021-10-29_03-05-20
  done: false
  episode_len_mean: 280.54
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.054000000000133
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 4
  episodes_total: 2716
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3298662644078125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1217007968160841
          entropy_coeff: 0.009999999999999998
          kl: 0.013985647928898557
          policy_loss: -0.002778458636667993
          total_loss: 1.3902568161487578
          vf_explained_var: 0.16068068146705627
          vf_loss: 1.4042523039711847
    num_agent_steps_sampled: 781000
    num_agent_steps_trained: 781000
    num_steps_sampled: 781000
    num_steps_trained: 78100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,781,21543.2,781000,-28.054,-22.8,-47.5,280.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 782000
  custom_metrics: {}
  date: 2021-10-29_03-05-47
  done: false
  episode_len_mean: 280.71
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.07100000000013
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 4
  episodes_total: 2720
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3298662644078125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9476048390070597
          entropy_coeff: 0.009999999999999998
          kl: 0.023017541564383178
          policy_loss: 0.0074841202961073985
          total_loss: 1.485545767678155
          vf_explained_var: 0.2938792407512665
          vf_loss: 1.4875377019246419
    num_agent_steps_sampled: 782000
    num_agent_steps_trained: 782000
    num_steps_sampled: 782000
    num_steps_trained: 782000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,782,21570.6,782000,-28.071,-22.8,-47.5,280.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 783000
  custom_metrics: {}
  date: 2021-10-29_03-06-13
  done: false
  episode_len_mean: 280.62
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.062000000000136
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 3
  episodes_total: 2723
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.994799396611719e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0243634872966343
          entropy_coeff: 0.009999999999999998
          kl: 0.01893790484162743
          policy_loss: -0.14623320003350576
          total_loss: 0.9760750624868605
          vf_explained_var: 0.42861223220825195
          vf_loss: 1.1325518866380055
    num_agent_steps_sampled: 783000
    num_agent_steps_trained: 783000
    num_steps_sampled: 783000
    num_steps_trained: 783000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,783,21596.1,783000,-28.062,-22.8,-47.5,280.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 784000
  custom_metrics: {}
  date: 2021-10-29_03-06-40
  done: false
  episode_len_mean: 282.64
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.264000000000127
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 4
  episodes_total: 2727
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.994799396611719e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3198400252395206
          entropy_coeff: 0.009999999999999998
          kl: 0.026419466741773097
          policy_loss: -0.02329303075869878
          total_loss: 0.7549486100673676
          vf_explained_var: 0.47375768423080444
          vf_loss: 0.7914400415288078
    num_agent_steps_sampled: 784000
    num_agent_steps_trained: 784000
    num_steps_sampled: 784000
    num_steps_trained: 784000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,784,21623.5,784000,-28.264,-22.8,-47.5,282.64




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 785000
  custom_metrics: {}
  date: 2021-10-29_03-07-25
  done: false
  episode_len_mean: 282.83
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.28300000000013
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 3
  episodes_total: 2730
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9921990949175783e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.507765641477373
          entropy_coeff: 0.009999999999999998
          kl: 0.012280263922975918
          policy_loss: -0.1081898102329837
          total_loss: 0.9041883263323042
          vf_explained_var: 0.2613966763019562
          vf_loss: 1.027455789513058
    num_agent_steps_sampled: 785000
    num_agent_steps_trained: 785000
    num_steps_sampled: 785000
    num_steps_trained: 785000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,785,21668.2,785000,-28.283,-22.8,-47.5,282.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 786000
  custom_metrics: {}
  date: 2021-10-29_03-07-48
  done: false
  episode_len_mean: 283.65
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.36500000000014
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 4
  episodes_total: 2734
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9921990949175783e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.6334211892551846
          entropy_coeff: 0.009999999999999998
          kl: 0.010975832009082845
          policy_loss: 0.08431345928046438
          total_loss: 0.8671800070338779
          vf_explained_var: 0.3380310833454132
          vf_loss: 0.79920075668229
    num_agent_steps_sampled: 786000
    num_agent_steps_trained: 786000
    num_steps_sampled: 786000
    num_steps_trained: 786000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,786,21691.6,786000,-28.365,-22.8,-47.5,283.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 787000
  custom_metrics: {}
  date: 2021-10-29_03-08-11
  done: false
  episode_len_mean: 286.06
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.606000000000137
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 3
  episodes_total: 2737
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9921990949175783e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1350003480911255
          entropy_coeff: 0.009999999999999998
          kl: 0.03113787085001094
          policy_loss: 0.06974167178074518
          total_loss: 0.7223485396967994
          vf_explained_var: 0.47201284766197205
          vf_loss: 0.6639568680276473
    num_agent_steps_sampled: 787000
    num_agent_steps_trained: 787000
    num_steps_sampled: 787000
    num_steps_trained: 787000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,787,21714.3,787000,-28.606,-22.8,-47.5,286.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 788000
  custom_metrics: {}
  date: 2021-10-29_03-08-34
  done: false
  episode_len_mean: 287.7
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.77000000000014
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 2
  episodes_total: 2739
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.4882986423763664e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.29504474401474
          entropy_coeff: 0.009999999999999998
          kl: 0.06706511899898891
          policy_loss: -0.1233917858865526
          total_loss: 0.88672809501489
          vf_explained_var: 0.4406306743621826
          vf_loss: 1.0230703067448403
    num_agent_steps_sampled: 788000
    num_agent_steps_trained: 788000
    num_steps_sampled: 788000
    num_steps_trained: 788000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,788,21736.7,788000,-28.77,-22.8,-47.5,287.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 789000
  custom_metrics: {}
  date: 2021-10-29_03-08-55
  done: false
  episode_len_mean: 289.4
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.94000000000014
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 3
  episodes_total: 2742
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.732447963564551e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.21271640724606
          entropy_coeff: 0.009999999999999998
          kl: 0.02646317710718286
          policy_loss: -0.10120535418391227
          total_loss: 0.7902288089195887
          vf_explained_var: 0.41768503189086914
          vf_loss: 0.9035613215631909
    num_agent_steps_sampled: 789000
    num_agent_steps_trained: 789000
    num_steps_sampled: 789000
    num_steps_trained: 789000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,789,21758.5,789000,-28.94,-22.8,-47.5,289.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 790000
  custom_metrics: {}
  date: 2021-10-29_03-09-14
  done: false
  episode_len_mean: 292.52
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.252000000000148
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2745
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0098671945346824e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.5326818254258898
          entropy_coeff: 0.009999999999999998
          kl: 0.02923653169082926
          policy_loss: 0.027582881185743542
          total_loss: 0.4873445591992802
          vf_explained_var: 0.4227679371833801
          vf_loss: 0.47508847531345155
    num_agent_steps_sampled: 790000
    num_agent_steps_trained: 790000
    num_steps_sampled: 790000
    num_steps_trained: 790000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,790,21777,790000,-29.252,-22.8,-49.4,292.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 791000
  custom_metrics: {}
  date: 2021-10-29_03-09-34
  done: false
  episode_len_mean: 295.03
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.503000000000153
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 2
  episodes_total: 2747
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5148007918020243e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.647900520430671
          entropy_coeff: 0.009999999999999998
          kl: 0.037211826486485855
          policy_loss: -0.11979289485348596
          total_loss: 0.24045613010724384
          vf_explained_var: 0.39501112699508667
          vf_loss: 0.3767279831071695
    num_agent_steps_sampled: 791000
    num_agent_steps_trained: 791000
    num_steps_sampled: 791000
    num_steps_trained: 791000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,791,21797.5,791000,-29.503,-22.8,-49.4,295.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 792000
  custom_metrics: {}
  date: 2021-10-29_03-09-56
  done: false
  episode_len_mean: 296.31
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.631000000000153
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2750
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.272201187703036e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.78296986023585
          entropy_coeff: 0.009999999999999998
          kl: 0.02528358549761644
          policy_loss: -0.09604915546046364
          total_loss: 0.5059518002801471
          vf_explained_var: 0.1585744023323059
          vf_loss: 0.6198306027385924
    num_agent_steps_sampled: 792000
    num_agent_steps_trained: 792000
    num_steps_sampled: 792000
    num_steps_trained: 792000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,792,21819.3,792000,-29.631,-22.8,-49.4,296.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 793000
  custom_metrics: {}
  date: 2021-10-29_03-10-18
  done: false
  episode_len_mean: 297.89
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.78900000000016
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2753
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4083017815545534e-06
          cur_lr: 5.000000000000001e-05
          entropy: 2.013084418243832
          entropy_coeff: 0.009999999999999998
          kl: 0.016613478244683223
          policy_loss: 0.026652796235349444
          total_loss: 0.6051215728123983
          vf_explained_var: -0.022284068167209625
          vf_loss: 0.5985995607657565
    num_agent_steps_sampled: 793000
    num_agent_steps_trained: 793000
    num_steps_sampled: 793000
    num_steps_trained: 793000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,793,21840.7,793000,-29.789,-22.8,-49.4,297.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 794000
  custom_metrics: {}
  date: 2021-10-29_03-10-41
  done: false
  episode_len_mean: 299.82
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.982000000000152
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2756
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4083017815545534e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.7539843916893005
          entropy_coeff: 0.009999999999999998
          kl: 0.025149398995176615
          policy_loss: 0.0205573418074184
          total_loss: 0.7322762800587548
          vf_explained_var: -0.11153671145439148
          vf_loss: 0.7292586973971791
    num_agent_steps_sampled: 794000
    num_agent_steps_trained: 794000
    num_steps_sampled: 794000
    num_steps_trained: 794000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,794,21863.9,794000,-29.982,-22.8,-49.4,299.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 795000
  custom_metrics: {}
  date: 2021-10-29_03-11-01
  done: false
  episode_len_mean: 302.71
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.271000000000157
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2759
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.11245267233183e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.8273555450969272
          entropy_coeff: 0.009999999999999998
          kl: 0.014206472485442765
          policy_loss: 0.045699614783128104
          total_loss: 0.84881438712279
          vf_explained_var: 0.02816113457083702
          vf_loss: 0.8213882601923412
    num_agent_steps_sampled: 795000
    num_agent_steps_trained: 795000
    num_steps_sampled: 795000
    num_steps_trained: 795000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,795,21883.9,795000,-30.271,-22.8,-49.4,302.71




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 796000
  custom_metrics: {}
  date: 2021-10-29_03-11-45
  done: false
  episode_len_mean: 302.37
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.237000000000158
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2762
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.11245267233183e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.362060308456421
          entropy_coeff: 0.009999999999999998
          kl: 0.009115381361930394
          policy_loss: -0.063612706379758
          total_loss: 0.9576964060465495
          vf_explained_var: 0.04883592575788498
          vf_loss: 1.0349296702278985
    num_agent_steps_sampled: 796000
    num_agent_steps_trained: 796000
    num_steps_sampled: 796000
    num_steps_trained: 796000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,796,21927.4,796000,-30.237,-22.8,-49.4,302.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 797000
  custom_metrics: {}
  date: 2021-10-29_03-12-08
  done: false
  episode_len_mean: 304.34
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.434000000000154
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2765
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.11245267233183e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3756116482946608
          entropy_coeff: 0.009999999999999998
          kl: 0.05634714310047578
          policy_loss: -0.19058853230542608
          total_loss: 0.5048271854718526
          vf_explained_var: 0.5130653977394104
          vf_loss: 0.7091715428564284
    num_agent_steps_sampled: 797000
    num_agent_steps_trained: 797000
    num_steps_sampled: 797000
    num_steps_trained: 797000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,797,21951.3,797000,-30.434,-22.8,-49.4,304.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 798000
  custom_metrics: {}
  date: 2021-10-29_03-12-31
  done: false
  episode_len_mean: 306.89
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.689000000000163
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2768
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.668679008497742e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.7216572893990412
          entropy_coeff: 0.009999999999999998
          kl: 0.030593492264465005
          policy_loss: -0.031007059580749934
          total_loss: 0.9848548061317868
          vf_explained_var: 0.15595611929893494
          vf_loss: 1.0330782016118367
    num_agent_steps_sampled: 798000
    num_agent_steps_trained: 798000
    num_steps_sampled: 798000
    num_steps_trained: 798000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,798,21973.4,798000,-30.689,-22.8,-49.4,306.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 799000
  custom_metrics: {}
  date: 2021-10-29_03-12-55
  done: false
  episode_len_mean: 308.23
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -30.823000000000174
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2771
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.150301851274662e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.3286405113008288
          entropy_coeff: 0.009999999999999998
          kl: 0.02158252136872297
          policy_loss: -0.052096429301632774
          total_loss: 0.8281893332799276
          vf_explained_var: 0.17425061762332916
          vf_loss: 0.8935719264878167
    num_agent_steps_sampled: 799000
    num_agent_steps_trained: 799000
    num_steps_sampled: 799000
    num_steps_trained: 799000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,799,21997.6,799000,-30.823,-24,-49.4,308.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 800000
  custom_metrics: {}
  date: 2021-10-29_03-13-17
  done: false
  episode_len_mean: 306.1
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -30.610000000000163
  episode_reward_min: -49.40000000000043
  episodes_this_iter: 3
  episodes_total: 2774
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.725452776911993e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.444948709011078
          entropy_coeff: 0.009999999999999998
          kl: 0.01913092193560083
          policy_loss: -0.10950107408894433
          total_loss: 0.8312491443422105
          vf_explained_var: 0.2016199380159378
          vf_loss: 0.9551993767420451
    num_agent_steps_sampled: 800000
    num_agent_steps_trained: 800000
    num_steps_sampled: 800000
    num_steps_trained: 800000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,800,22019.9,800000,-30.61,-24,-49.4,306.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 801000
  custom_metrics: {}
  date: 2021-10-29_03-13-36
  done: false
  episode_len_mean: 310.55
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -31.055000000000167
  episode_reward_min: -54.3000000000005
  episodes_this_iter: 3
  episodes_total: 2777
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.725452776911993e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2336075941721598
          entropy_coeff: 0.009999999999999998
          kl: 0.020129449533880638
          policy_loss: 0.06068514523406823
          total_loss: 0.8881207896603478
          vf_explained_var: -0.1756659895181656
          vf_loss: 0.8397713800271353
    num_agent_steps_sampled: 801000
    num_agent_steps_trained: 801000
    num_steps_sampled: 801000
    num_steps_trained: 801000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,801,22038.9,801000,-31.055,-24,-54.3,310.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 802000
  custom_metrics: {}
  date: 2021-10-29_03-13-58
  done: false
  episode_len_mean: 312.63
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -31.263000000000176
  episode_reward_min: -54.3000000000005
  episodes_this_iter: 3
  episodes_total: 2780
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.459134460820092
          entropy_coeff: 0.009999999999999998
          kl: 0.013382232153743522
          policy_loss: 0.02853107253710429
          total_loss: 0.797759716047181
          vf_explained_var: -0.2588520348072052
          vf_loss: 0.7838196365369691
    num_agent_steps_sampled: 802000
    num_agent_steps_trained: 802000
    num_steps_sampled: 802000
    num_steps_trained: 802000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,802,22060.7,802000,-31.263,-24,-54.3,312.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 803000
  custom_metrics: {}
  date: 2021-10-29_03-14-26
  done: false
  episode_len_mean: 312.03
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -31.203000000000173
  episode_reward_min: -54.3000000000005
  episodes_this_iter: 3
  episodes_total: 2783
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2170003480381435
          entropy_coeff: 0.009999999999999998
          kl: 0.013743683394389845
          policy_loss: -0.0754515278670523
          total_loss: 0.8258213559786479
          vf_explained_var: 0.25662630796432495
          vf_loss: 0.9134425388442146
    num_agent_steps_sampled: 803000
    num_agent_steps_trained: 803000
    num_steps_sampled: 803000
    num_steps_trained: 803000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,803,22088.4,803000,-31.203,-24,-54.3,312.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 804000
  custom_metrics: {}
  date: 2021-10-29_03-14-54
  done: false
  episode_len_mean: 312.16
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -31.216000000000175
  episode_reward_min: -54.3000000000005
  episodes_this_iter: 4
  episodes_total: 2787
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2353160361448923
          entropy_coeff: 0.009999999999999998
          kl: 0.019044468500244723
          policy_loss: 0.009066223063402705
          total_loss: 0.9149996234310998
          vf_explained_var: 0.3677312135696411
          vf_loss: 0.9182860652605692
    num_agent_steps_sampled: 804000
    num_agent_steps_trained: 804000
    num_steps_sampled: 804000
    num_steps_trained: 804000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,804,22116.5,804000,-31.216,-24,-54.3,312.16




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 805000
  custom_metrics: {}
  date: 2021-10-29_03-15-33
  done: false
  episode_len_mean: 312.68
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -31.268000000000175
  episode_reward_min: -54.3000000000005
  episodes_this_iter: 3
  episodes_total: 2790
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.160918080806732
          entropy_coeff: 0.009999999999999998
          kl: 0.011102663265988532
          policy_loss: 0.01651994287967682
          total_loss: 0.6394418163432015
          vf_explained_var: 0.39084506034851074
          vf_loss: 0.6345307676328553
    num_agent_steps_sampled: 805000
    num_agent_steps_trained: 805000
    num_steps_sampled: 805000
    num_steps_trained: 805000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,805,22156.2,805000,-31.268,-24,-54.3,312.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 806000
  custom_metrics: {}
  date: 2021-10-29_03-15-59
  done: false
  episode_len_mean: 313.79
  episode_media: {}
  episode_reward_max: -24.00000000000007
  episode_reward_mean: -31.37900000000017
  episode_reward_min: -54.3000000000005
  episodes_this_iter: 4
  episodes_total: 2794
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9603519757588704
          entropy_coeff: 0.009999999999999998
          kl: 0.014891594920269528
          policy_loss: -0.01315238591697481
          total_loss: 1.1613618863953485
          vf_explained_var: 0.32257771492004395
          vf_loss: 1.1841174099180434
    num_agent_steps_sampled: 806000
    num_agent_steps_trained: 806000
    num_steps_sampled: 806000
    num_steps_trained: 806000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,806,22181.9,806000,-31.379,-24,-54.3,313.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 807000
  custom_metrics: {}
  date: 2021-10-29_03-16-23
  done: false
  episode_len_mean: 315.13
  episode_media: {}
  episode_reward_max: -23.700000000000067
  episode_reward_mean: -31.51300000000018
  episode_reward_min: -54.3000000000005
  episodes_this_iter: 3
  episodes_total: 2797
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0524315251244438
          entropy_coeff: 0.009999999999999998
          kl: 0.009694497575398269
          policy_loss: 0.11126257793770897
          total_loss: 0.7326408543520504
          vf_explained_var: -0.04460201412439346
          vf_loss: 0.6319023377572497
    num_agent_steps_sampled: 807000
    num_agent_steps_trained: 807000
    num_steps_sampled: 807000
    num_steps_trained: 807000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,807,22205.3,807000,-31.513,-23.7,-54.3,315.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 808000
  custom_metrics: {}
  date: 2021-10-29_03-16-47
  done: false
  episode_len_mean: 314.21
  episode_media: {}
  episode_reward_max: -23.700000000000067
  episode_reward_mean: -31.421000000000177
  episode_reward_min: -54.3000000000005
  episodes_this_iter: 3
  episodes_total: 2800
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9301947918203142
          entropy_coeff: 0.009999999999999998
          kl: 0.008028061553721126
          policy_loss: 0.04403807032439444
          total_loss: 0.4173496190044615
          vf_explained_var: -0.007308045867830515
          vf_loss: 0.3826132827231454
    num_agent_steps_sampled: 808000
    num_agent_steps_trained: 808000
    num_steps_sampled: 808000
    num_steps_trained: 808000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,808,22229.6,808000,-31.421,-23.7,-54.3,314.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 809000
  custom_metrics: {}
  date: 2021-10-29_03-17-14
  done: false
  episode_len_mean: 313.57
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -31.357000000000177
  episode_reward_min: -54.3000000000005
  episodes_this_iter: 4
  episodes_total: 2804
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.1596096091800265
          entropy_coeff: 0.009999999999999998
          kl: 0.01259168161710199
          policy_loss: -0.023635362254248726
          total_loss: 1.0094738099310132
          vf_explained_var: 0.34107255935668945
          vf_loss: 1.0447049445576138
    num_agent_steps_sampled: 809000
    num_agent_steps_trained: 809000
    num_steps_sampled: 809000
    num_steps_trained: 809000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,809,22256.3,809000,-31.357,-22.9,-54.3,313.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 810000
  custom_metrics: {}
  date: 2021-10-29_03-17-36
  done: false
  episode_len_mean: 315.15
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -31.515000000000178
  episode_reward_min: -54.80000000000051
  episodes_this_iter: 3
  episodes_total: 2807
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9758186999294493
          entropy_coeff: 0.009999999999999998
          kl: 0.00842442742860167
          policy_loss: -0.0240763949851195
          total_loss: 0.9032633089356952
          vf_explained_var: -0.13737501204013824
          vf_loss: 0.9370976712968614
    num_agent_steps_sampled: 810000
    num_agent_steps_trained: 810000
    num_steps_sampled: 810000
    num_steps_trained: 810000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,810,22278.7,810000,-31.515,-22.9,-54.8,315.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 811000
  custom_metrics: {}
  date: 2021-10-29_03-17-56
  done: false
  episode_len_mean: 317.51
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -31.751000000000182
  episode_reward_min: -54.80000000000051
  episodes_this_iter: 2
  episodes_total: 2809
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.588179165367989e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.148351460032993
          entropy_coeff: 0.009999999999999998
          kl: 0.02590726894486824
          policy_loss: -0.06527363078461754
          total_loss: 0.7791517244444953
          vf_explained_var: -0.037629544734954834
          vf_loss: 0.8559082016348839
    num_agent_steps_sampled: 811000
    num_agent_steps_trained: 811000
    num_steps_sampled: 811000
    num_steps_trained: 811000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,811,22298.4,811000,-31.751,-22.9,-54.8,317.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 812000
  custom_metrics: {}
  date: 2021-10-29_03-18-11
  done: false
  episode_len_mean: 323.09
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -32.3090000000002
  episode_reward_min: -55.80000000000052
  episodes_this_iter: 2
  episodes_total: 2811
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.882268748051983e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0750025153160094
          entropy_coeff: 0.009999999999999998
          kl: 0.011192391130904847
          policy_loss: 0.0416635983520084
          total_loss: 0.7852030485868454
          vf_explained_var: -0.16168753802776337
          vf_loss: 0.7542890326016479
    num_agent_steps_sampled: 812000
    num_agent_steps_trained: 812000
    num_steps_sampled: 812000
    num_steps_trained: 812000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,812,22313.5,812000,-32.309,-22.9,-55.8,323.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 813000
  custom_metrics: {}
  date: 2021-10-29_03-18-32
  done: false
  episode_len_mean: 325.97
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -32.59700000000019
  episode_reward_min: -55.80000000000052
  episodes_this_iter: 3
  episodes_total: 2814
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.882268748051983e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.3060125841034784
          entropy_coeff: 0.009999999999999998
          kl: 0.049329850586416826
          policy_loss: 0.06297662572728263
          total_loss: 1.2223833759625753
          vf_explained_var: -0.278274267911911
          vf_loss: 1.1724649669395553
    num_agent_steps_sampled: 813000
    num_agent_steps_trained: 813000
    num_steps_sampled: 813000
    num_steps_trained: 813000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,813,22334.7,813000,-32.597,-22.9,-55.8,325.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 814000
  custom_metrics: {}
  date: 2021-10-29_03-18-59
  done: false
  episode_len_mean: 326.18
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -32.61800000000019
  episode_reward_min: -55.80000000000052
  episodes_this_iter: 4
  episodes_total: 2818
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.823403122077974e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.432044866349962
          entropy_coeff: 0.009999999999999998
          kl: 0.030315931151307908
          policy_loss: 0.023252322607570226
          total_loss: 1.343310707145267
          vf_explained_var: 0.06574690341949463
          vf_loss: 1.3343770729170905
    num_agent_steps_sampled: 814000
    num_agent_steps_trained: 814000
    num_steps_sampled: 814000
    num_steps_trained: 814000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,814,22361.9,814000,-32.618,-22.9,-55.8,326.18




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 815000
  custom_metrics: {}
  date: 2021-10-29_03-19-35
  done: false
  episode_len_mean: 327.09
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -32.709000000000195
  episode_reward_min: -55.80000000000052
  episodes_this_iter: 2
  episodes_total: 2820
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.735104683116964e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.192084515094757
          entropy_coeff: 0.009999999999999998
          kl: 0.028838051972593665
          policy_loss: -0.0321390805972947
          total_loss: 0.8685903973049588
          vf_explained_var: 0.20894627273082733
          vf_loss: 0.912647800706327
    num_agent_steps_sampled: 815000
    num_agent_steps_trained: 815000
    num_steps_sampled: 815000
    num_steps_trained: 815000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,815,22397.7,815000,-32.709,-22.3,-55.8,327.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 816000
  custom_metrics: {}
  date: 2021-10-29_03-19-55
  done: false
  episode_len_mean: 331.71
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.171000000000205
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 2
  episodes_total: 2822
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001310265702467544
          cur_lr: 5.000000000000001e-05
          entropy: 1.182227491007911
          entropy_coeff: 0.009999999999999998
          kl: 0.009670374741084073
          policy_loss: -0.06995811541047361
          total_loss: 0.6469741157359548
          vf_explained_var: -0.1600019633769989
          vf_loss: 0.7287532373848888
    num_agent_steps_sampled: 816000
    num_agent_steps_trained: 816000
    num_steps_sampled: 816000
    num_steps_trained: 816000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,816,22417.2,816000,-33.171,-22.3,-57,331.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 817000
  custom_metrics: {}
  date: 2021-10-29_03-20-17
  done: false
  episode_len_mean: 333.74
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.37400000000021
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 3
  episodes_total: 2825
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001310265702467544
          cur_lr: 5.000000000000001e-05
          entropy: 1.1329614520072937
          entropy_coeff: 0.009999999999999998
          kl: 0.017890056653454108
          policy_loss: -0.048070026768578425
          total_loss: 1.2097180684407551
          vf_explained_var: 0.10015523433685303
          vf_loss: 1.2691153506437938
    num_agent_steps_sampled: 817000
    num_agent_steps_trained: 817000
    num_steps_sampled: 817000
    num_steps_trained: 817000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,817,22439.8,817000,-33.374,-22.3,-57,333.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 818000
  custom_metrics: {}
  date: 2021-10-29_03-20-39
  done: false
  episode_len_mean: 336.61
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.66100000000021
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 3
  episodes_total: 2828
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001310265702467544
          cur_lr: 5.000000000000001e-05
          entropy: 1.0260994619793362
          entropy_coeff: 0.009999999999999998
          kl: 0.009482957697022915
          policy_loss: 0.03187567343314489
          total_loss: 1.0305357032352023
          vf_explained_var: -0.05053550750017166
          vf_loss: 1.0089197651379638
    num_agent_steps_sampled: 818000
    num_agent_steps_trained: 818000
    num_steps_sampled: 818000
    num_steps_trained: 818000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,818,22461.1,818000,-33.661,-22.3,-57,336.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 819000
  custom_metrics: {}
  date: 2021-10-29_03-21-04
  done: false
  episode_len_mean: 337.27
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.72700000000021
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 3
  episodes_total: 2831
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001310265702467544
          cur_lr: 5.000000000000001e-05
          entropy: 1.046167231268353
          entropy_coeff: 0.009999999999999998
          kl: 0.014811387608857293
          policy_loss: -0.1373829016255008
          total_loss: 1.172104627556271
          vf_explained_var: 0.39469850063323975
          vf_loss: 1.319947263929579
    num_agent_steps_sampled: 819000
    num_agent_steps_trained: 819000
    num_steps_sampled: 819000
    num_steps_trained: 819000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,819,22486.5,819000,-33.727,-22.3,-57,337.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 820000
  custom_metrics: {}
  date: 2021-10-29_03-21-29
  done: false
  episode_len_mean: 335.16
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.51600000000021
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 4
  episodes_total: 2835
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001310265702467544
          cur_lr: 5.000000000000001e-05
          entropy: 1.2221444182925754
          entropy_coeff: 0.009999999999999998
          kl: 0.006854028197055761
          policy_loss: 0.02150375751985444
          total_loss: 1.0592198835478888
          vf_explained_var: 0.4609018862247467
          vf_loss: 1.0499366654290094
    num_agent_steps_sampled: 820000
    num_agent_steps_trained: 820000
    num_steps_sampled: 820000
    num_steps_trained: 820000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,820,22511.9,820000,-33.516,-22.3,-57,335.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 821000
  custom_metrics: {}
  date: 2021-10-29_03-21-53
  done: false
  episode_len_mean: 335.39
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.5390000000002
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 3
  episodes_total: 2838
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001310265702467544
          cur_lr: 5.000000000000001e-05
          entropy: 1.2046395182609557
          entropy_coeff: 0.009999999999999998
          kl: 0.009068345034667481
          policy_loss: 0.10907742025123703
          total_loss: 0.7379424591859182
          vf_explained_var: 0.13910913467407227
          vf_loss: 0.6409102482928171
    num_agent_steps_sampled: 821000
    num_agent_steps_trained: 821000
    num_steps_sampled: 821000
    num_steps_trained: 821000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,821,22535.3,821000,-33.539,-22.3,-57,335.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 822000
  custom_metrics: {}
  date: 2021-10-29_03-22-15
  done: false
  episode_len_mean: 335.4
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.54000000000021
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 3
  episodes_total: 2841
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001310265702467544
          cur_lr: 5.000000000000001e-05
          entropy: 0.9646143628491296
          entropy_coeff: 0.009999999999999998
          kl: 0.011895470940169399
          policy_loss: 0.06083887111809519
          total_loss: 0.9716507898436653
          vf_explained_var: 0.32202497124671936
          vf_loss: 0.9204565178189013
    num_agent_steps_sampled: 822000
    num_agent_steps_trained: 822000
    num_steps_sampled: 822000
    num_steps_trained: 822000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,822,22557.7,822000,-33.54,-22.3,-57,335.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 823000
  custom_metrics: {}
  date: 2021-10-29_03-22-37
  done: false
  episode_len_mean: 335.06
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.506000000000206
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 3
  episodes_total: 2844
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001310265702467544
          cur_lr: 5.000000000000001e-05
          entropy: 0.9217768099572924
          entropy_coeff: 0.009999999999999998
          kl: 0.020283967191060272
          policy_loss: -0.06468533840444353
          total_loss: 1.0050197896030215
          vf_explained_var: 0.0052848393097519875
          vf_loss: 1.0789202210803828
    num_agent_steps_sampled: 823000
    num_agent_steps_trained: 823000
    num_steps_sampled: 823000
    num_steps_trained: 823000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,823,22579.1,823000,-33.506,-22.3,-57,335.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 824000
  custom_metrics: {}
  date: 2021-10-29_03-22-58
  done: false
  episode_len_mean: 334.32
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.43200000000021
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 2
  episodes_total: 2846
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019653985537013165
          cur_lr: 5.000000000000001e-05
          entropy: 1.1541949331760406
          entropy_coeff: 0.009999999999999998
          kl: 0.010731053582865385
          policy_loss: 0.08978685488303502
          total_loss: 0.4097387280729082
          vf_explained_var: -0.1298924833536148
          vf_loss: 0.3314917184412479
    num_agent_steps_sampled: 824000
    num_agent_steps_trained: 824000
    num_steps_sampled: 824000
    num_steps_trained: 824000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,824,22600.5,824000,-33.432,-22.3,-57,334.32




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 825000
  custom_metrics: {}
  date: 2021-10-29_03-23-37
  done: false
  episode_len_mean: 332.66
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.26600000000021
  episode_reward_min: -57.00000000000054
  episodes_this_iter: 4
  episodes_total: 2850
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00019653985537013165
          cur_lr: 5.000000000000001e-05
          entropy: 1.0266684578524696
          entropy_coeff: 0.009999999999999998
          kl: 0.02587301401991553
          policy_loss: 0.0031016521983676486
          total_loss: 1.412999059094323
          vf_explained_var: 0.2586238980293274
          vf_loss: 1.420158987575107
    num_agent_steps_sampled: 825000
    num_agent_steps_trained: 825000
    num_steps_sampled: 825000
    num_steps_trained: 825000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,825,22639.3,825000,-33.266,-22.3,-57,332.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 826000
  custom_metrics: {}
  date: 2021-10-29_03-23-52
  done: false
  episode_len_mean: 335.59
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.55900000000021
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 1
  episodes_total: 2851
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002948097830551975
          cur_lr: 5.000000000000001e-05
          entropy: 0.7594022730986277
          entropy_coeff: 0.009999999999999998
          kl: 0.005224117863186074
          policy_loss: -0.05833003951443566
          total_loss: 0.6785798615879483
          vf_explained_var: -0.590333104133606
          vf_loss: 0.7445023744056622
    num_agent_steps_sampled: 826000
    num_agent_steps_trained: 826000
    num_steps_sampled: 826000
    num_steps_trained: 826000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,826,22654.6,826000,-33.559,-22.3,-66.9,335.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 827000
  custom_metrics: {}
  date: 2021-10-29_03-24-15
  done: false
  episode_len_mean: 336.89
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.689000000000206
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 3
  episodes_total: 2854
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0002948097830551975
          cur_lr: 5.000000000000001e-05
          entropy: 1.2890995899836222
          entropy_coeff: 0.009999999999999998
          kl: 0.03350138108459851
          policy_loss: 0.11646272635294332
          total_loss: 0.7040730903546015
          vf_explained_var: 0.11728944629430771
          vf_loss: 0.6004914763487048
    num_agent_steps_sampled: 827000
    num_agent_steps_trained: 827000
    num_steps_sampled: 827000
    num_steps_trained: 827000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,827,22676.9,827000,-33.689,-22.3,-66.9,336.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 828000
  custom_metrics: {}
  date: 2021-10-29_03-24-38
  done: false
  episode_len_mean: 336.57
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.65700000000021
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 3
  episodes_total: 2857
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004422146745827962
          cur_lr: 5.000000000000001e-05
          entropy: 1.239163774251938
          entropy_coeff: 0.009999999999999998
          kl: 0.018990250953229627
          policy_loss: 0.06118423028124703
          total_loss: 0.9910404251681434
          vf_explained_var: -0.06665181368589401
          vf_loss: 0.9422394322024451
    num_agent_steps_sampled: 828000
    num_agent_steps_trained: 828000
    num_steps_sampled: 828000
    num_steps_trained: 828000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,828,22700.2,828000,-33.657,-22.3,-66.9,336.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 829000
  custom_metrics: {}
  date: 2021-10-29_03-25-04
  done: false
  episode_len_mean: 334.89
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.48900000000021
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 4
  episodes_total: 2861
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004422146745827962
          cur_lr: 5.000000000000001e-05
          entropy: 1.176191892226537
          entropy_coeff: 0.009999999999999998
          kl: 0.009936848841479357
          policy_loss: -0.022735717313157187
          total_loss: 1.3423044602076213
          vf_explained_var: 0.39895904064178467
          vf_loss: 1.3767977012528314
    num_agent_steps_sampled: 829000
    num_agent_steps_trained: 829000
    num_steps_sampled: 829000
    num_steps_trained: 829000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,829,22725.8,829000,-33.489,-22.3,-66.9,334.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 830000
  custom_metrics: {}
  date: 2021-10-29_03-25-24
  done: false
  episode_len_mean: 335.76
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.57600000000021
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 2
  episodes_total: 2863
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004422146745827962
          cur_lr: 5.000000000000001e-05
          entropy: 1.358909969859653
          entropy_coeff: 0.009999999999999998
          kl: 0.014200783173174851
          policy_loss: -0.07083428311679098
          total_loss: 0.9224070565568077
          vf_explained_var: 0.4949107766151428
          vf_loss: 1.006824161609014
    num_agent_steps_sampled: 830000
    num_agent_steps_trained: 830000
    num_steps_sampled: 830000
    num_steps_trained: 830000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,830,22745.8,830000,-33.576,-22.3,-66.9,335.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 831000
  custom_metrics: {}
  date: 2021-10-29_03-25-44
  done: false
  episode_len_mean: 337.0
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.70000000000021
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 3
  episodes_total: 2866
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004422146745827962
          cur_lr: 5.000000000000001e-05
          entropy: 1.1578595870070987
          entropy_coeff: 0.009999999999999998
          kl: 0.01585037312588536
          policy_loss: 0.048582284359468354
          total_loss: 0.653710025217798
          vf_explained_var: 0.2678070664405823
          vf_loss: 0.6166993235962258
    num_agent_steps_sampled: 831000
    num_agent_steps_trained: 831000
    num_steps_sampled: 831000
    num_steps_trained: 831000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,831,22766.5,831000,-33.7,-22.3,-66.9,337


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 832000
  custom_metrics: {}
  date: 2021-10-29_03-25-59
  done: false
  episode_len_mean: 342.14
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -34.21400000000022
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 2
  episodes_total: 2868
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004422146745827962
          cur_lr: 5.000000000000001e-05
          entropy: 1.2860900547769334
          entropy_coeff: 0.009999999999999998
          kl: 0.03867372290110917
          policy_loss: 0.09235420988665687
          total_loss: 0.8943447530269623
          vf_explained_var: -0.5600740909576416
          vf_loss: 0.8148343559768465
    num_agent_steps_sampled: 832000
    num_agent_steps_trained: 832000
    num_steps_sampled: 832000
    num_steps_trained: 832000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,832,22781.1,832000,-34.214,-22.3,-66.9,342.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 833000
  custom_metrics: {}
  date: 2021-10-29_03-26-19
  done: false
  episode_len_mean: 342.26
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -34.22600000000022
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 2
  episodes_total: 2870
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006633220118741944
          cur_lr: 5.000000000000001e-05
          entropy: 1.3931975271966723
          entropy_coeff: 0.009999999999999998
          kl: 0.016869150655807984
          policy_loss: -0.08229006015592151
          total_loss: 1.2505923002958297
          vf_explained_var: 0.14339815080165863
          vf_loss: 1.346803139977985
    num_agent_steps_sampled: 833000
    num_agent_steps_trained: 833000
    num_steps_sampled: 833000
    num_steps_trained: 833000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,833,22801.2,833000,-34.226,-22.3,-66.9,342.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 834000
  custom_metrics: {}
  date: 2021-10-29_03-26-36
  done: false
  episode_len_mean: 345.51
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -34.55100000000022
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 2
  episodes_total: 2872
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0006633220118741944
          cur_lr: 5.000000000000001e-05
          entropy: 0.955433601140976
          entropy_coeff: 0.009999999999999998
          kl: 0.029537486449656866
          policy_loss: -0.08379758579863442
          total_loss: 1.232291411028968
          vf_explained_var: -0.22641848027706146
          vf_loss: 1.325623745388455
    num_agent_steps_sampled: 834000
    num_agent_steps_trained: 834000
    num_steps_sampled: 834000
    num_steps_trained: 834000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,834,22818.5,834000,-34.551,-22.3,-66.9,345.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 835000
  custom_metrics: {}
  date: 2021-10-29_03-26-53
  done: false
  episode_len_mean: 349.91
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -34.99100000000023
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 2
  episodes_total: 2874
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 0.8650752312607235
          entropy_coeff: 0.009999999999999998
          kl: 0.011040188600065825
          policy_loss: -0.1055411375231213
          total_loss: 1.1963485906521478
          vf_explained_var: -0.14072266221046448
          vf_loss: 1.310529478225443
    num_agent_steps_sampled: 835000
    num_agent_steps_trained: 835000
    num_steps_sampled: 835000
    num_steps_trained: 835000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,835,22835.4,835000,-34.991,-22.3,-66.9,349.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 836000
  custom_metrics: {}
  date: 2021-10-29_03-27-10
  done: false
  episode_len_mean: 351.46
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -35.14600000000023
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 2
  episodes_total: 2876
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 0.981845901409785
          entropy_coeff: 0.009999999999999998
          kl: 0.005455617924213365
          policy_loss: -0.08041208568546507
          total_loss: 1.1194871508412891
          vf_explained_var: 0.0465250238776207
          vf_loss: 1.2097122720546192
    num_agent_steps_sampled: 836000
    num_agent_steps_trained: 836000
    num_steps_sampled: 836000
    num_steps_trained: 836000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,836,22852.1,836000,-35.146,-22.3,-66.9,351.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 837000
  custom_metrics: {}
  date: 2021-10-29_03-27-27
  done: false
  episode_len_mean: 351.3
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -35.13000000000023
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 2
  episodes_total: 2878
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 1.001186125808292
          entropy_coeff: 0.009999999999999998
          kl: 0.00777789948578105
          policy_loss: -0.06877702855401568
          total_loss: 1.2029650499423346
          vf_explained_var: -0.1632230132818222
          vf_loss: 1.2817461961673366
    num_agent_steps_sampled: 837000
    num_agent_steps_trained: 837000
    num_steps_sampled: 837000
    num_steps_trained: 837000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,837,22868.5,837000,-35.13,-22.3,-66.9,351.3




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 838000
  custom_metrics: {}
  date: 2021-10-29_03-27-57
  done: false
  episode_len_mean: 354.38
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -35.43800000000024
  episode_reward_min: -66.90000000000047
  episodes_this_iter: 2
  episodes_total: 2880
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 0.8975141551759508
          entropy_coeff: 0.009999999999999998
          kl: 0.010828374733161499
          policy_loss: -0.0608798591627015
          total_loss: 1.0845909199780888
          vf_explained_var: -0.30573004484176636
          vf_loss: 1.1544351410948568
    num_agent_steps_sampled: 838000
    num_agent_steps_trained: 838000
    num_steps_sampled: 838000
    num_steps_trained: 838000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,838,22899.4,838000,-35.438,-22.3,-66.9,354.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 839000
  custom_metrics: {}
  date: 2021-10-29_03-28-13
  done: false
  episode_len_mean: 363.12
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -36.31200000000023
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2882
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 0.5986759410964118
          entropy_coeff: 0.009999999999999998
          kl: 0.005401086260448551
          policy_loss: 0.02403978341155582
          total_loss: 0.7778918544451395
          vf_explained_var: -0.6030831336975098
          vf_loss: 0.7598334478835265
    num_agent_steps_sampled: 839000
    num_agent_steps_trained: 839000
    num_steps_sampled: 839000
    num_steps_trained: 839000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,839,22914.7,839000,-36.312,-22.3,-73.1,363.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 840000
  custom_metrics: {}
  date: 2021-10-29_03-28-28
  done: false
  episode_len_mean: 368.59
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -36.85900000000024
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2884
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 0.9099000268512302
          entropy_coeff: 0.009999999999999998
          kl: 0.008837227446763546
          policy_loss: 0.1006997459464603
          total_loss: 0.7940467307964961
          vf_explained_var: -0.28126806020736694
          vf_loss: 0.7024371878761384
    num_agent_steps_sampled: 840000
    num_agent_steps_trained: 840000
    num_steps_sampled: 840000
    num_steps_trained: 840000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,840,22929.6,840000,-36.859,-22.3,-73.1,368.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 841000
  custom_metrics: {}
  date: 2021-10-29_03-28-44
  done: false
  episode_len_mean: 374.05
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -37.40500000000025
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2886
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 1.166878409518136
          entropy_coeff: 0.009999999999999998
          kl: 0.007660943036558818
          policy_loss: 0.08887929370005926
          total_loss: 0.7872516262862418
          vf_explained_var: 0.13971802592277527
          vf_loss: 0.7100334927853611
    num_agent_steps_sampled: 841000
    num_agent_steps_trained: 841000
    num_steps_sampled: 841000
    num_steps_trained: 841000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,841,22945.5,841000,-37.405,-22.3,-73.1,374.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 842000
  custom_metrics: {}
  date: 2021-10-29_03-28-59
  done: false
  episode_len_mean: 377.74
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -37.77400000000026
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 1
  episodes_total: 2887
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 0.772223957379659
          entropy_coeff: 0.009999999999999998
          kl: 0.006558772380642333
          policy_loss: -0.07989751005338298
          total_loss: 0.6193299157751931
          vf_explained_var: -0.6657688021659851
          vf_loss: 0.7069431300999389
    num_agent_steps_sampled: 842000
    num_agent_steps_trained: 842000
    num_steps_sampled: 842000
    num_steps_trained: 842000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,842,22960.5,842000,-37.774,-22.3,-73.1,377.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 843000
  custom_metrics: {}
  date: 2021-10-29_03-29-14
  done: false
  episode_len_mean: 381.97
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -38.19700000000026
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2889
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009949830178112914
          cur_lr: 5.000000000000001e-05
          entropy: 1.1730614470110998
          entropy_coeff: 0.009999999999999998
          kl: 0.07796861244524392
          policy_loss: -0.053877752191490595
          total_loss: 0.9115976302160157
          vf_explained_var: -0.020025737583637238
          vf_loss: 0.9771284128228823
    num_agent_steps_sampled: 843000
    num_agent_steps_trained: 843000
    num_steps_sampled: 843000
    num_steps_trained: 843000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,843,22975.7,843000,-38.197,-22.3,-73.1,381.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 844000
  custom_metrics: {}
  date: 2021-10-29_03-29-33
  done: false
  episode_len_mean: 386.63
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -38.66300000000027
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2892
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014924745267169376
          cur_lr: 5.000000000000001e-05
          entropy: 1.2533722996711731
          entropy_coeff: 0.009999999999999998
          kl: 0.04530249719556924
          policy_loss: 0.09274230980210835
          total_loss: 1.252624179257287
          vf_explained_var: -0.03025512956082821
          vf_loss: 1.172347972707616
    num_agent_steps_sampled: 844000
    num_agent_steps_trained: 844000
    num_steps_sampled: 844000
    num_steps_trained: 844000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,844,22994.5,844000,-38.663,-22.3,-73.1,386.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 845000
  custom_metrics: {}
  date: 2021-10-29_03-29-51
  done: false
  episode_len_mean: 388.99
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -38.89900000000027
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2894
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022387117900754053
          cur_lr: 5.000000000000001e-05
          entropy: 1.14209054046207
          entropy_coeff: 0.009999999999999998
          kl: 0.01998474660272692
          policy_loss: 0.08951659732394748
          total_loss: 0.8149842699368794
          vf_explained_var: -0.004092517774552107
          vf_loss: 0.7368438410262267
    num_agent_steps_sampled: 845000
    num_agent_steps_trained: 845000
    num_steps_sampled: 845000
    num_steps_trained: 845000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,845,23013,845000,-38.899,-22.3,-73.1,388.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 846000
  custom_metrics: {}
  date: 2021-10-29_03-30-10
  done: false
  episode_len_mean: 392.62
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -39.26200000000028
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2896
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022387117900754053
          cur_lr: 5.000000000000001e-05
          entropy: 0.99246736102634
          entropy_coeff: 0.009999999999999998
          kl: 0.018526869546313603
          policy_loss: -0.12774783902698092
          total_loss: 1.016099633110894
          vf_explained_var: -0.08713525533676147
          vf_loss: 1.1537306738189526
    num_agent_steps_sampled: 846000
    num_agent_steps_trained: 846000
    num_steps_sampled: 846000
    num_steps_trained: 846000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,846,23032.2,846000,-39.262,-22.3,-73.1,392.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 847000
  custom_metrics: {}
  date: 2021-10-29_03-30-34
  done: false
  episode_len_mean: 390.83
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -39.08300000000027
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 4
  episodes_total: 2900
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022387117900754053
          cur_lr: 5.000000000000001e-05
          entropy: 1.222714853949017
          entropy_coeff: 0.009999999999999998
          kl: 0.03637073813119521
          policy_loss: -0.021974600189261968
          total_loss: 1.1017896410491732
          vf_explained_var: 0.4987741708755493
          vf_loss: 1.135909965965483
    num_agent_steps_sampled: 847000
    num_agent_steps_trained: 847000
    num_steps_sampled: 847000
    num_steps_trained: 847000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,847,23056,847000,-39.083,-22.3,-73.1,390.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 848000
  custom_metrics: {}
  date: 2021-10-29_03-30-54
  done: false
  episode_len_mean: 394.6
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -39.460000000000285
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2902
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0033580676851131088
          cur_lr: 5.000000000000001e-05
          entropy: 1.2104767163594563
          entropy_coeff: 0.009999999999999998
          kl: 0.010597139295508838
          policy_loss: 0.018062990572717454
          total_loss: 0.7271840317381753
          vf_explained_var: -0.07362334430217743
          vf_loss: 0.7211902138363157
    num_agent_steps_sampled: 848000
    num_agent_steps_trained: 848000
    num_steps_sampled: 848000
    num_steps_trained: 848000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,848,23075.7,848000,-39.46,-22.3,-73.1,394.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 849000
  custom_metrics: {}
  date: 2021-10-29_03-31-16
  done: false
  episode_len_mean: 392.81
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -39.281000000000276
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2905
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0033580676851131088
          cur_lr: 5.000000000000001e-05
          entropy: 1.247587074836095
          entropy_coeff: 0.009999999999999998
          kl: 0.01949778528404909
          policy_loss: 0.080609406861994
          total_loss: 0.7374247286054824
          vf_explained_var: 0.5745813846588135
          vf_loss: 0.6692257074846162
    num_agent_steps_sampled: 849000
    num_agent_steps_trained: 849000
    num_steps_sampled: 849000
    num_steps_trained: 849000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,849,23097.9,849000,-39.281,-22.3,-73.1,392.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 850000
  custom_metrics: {}
  date: 2021-10-29_03-31-37
  done: false
  episode_len_mean: 393.81
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -39.38100000000028
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2908
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0033580676851131088
          cur_lr: 5.000000000000001e-05
          entropy: 1.5705792175398932
          entropy_coeff: 0.009999999999999998
          kl: 0.027709465482567846
          policy_loss: 0.002964521116680569
          total_loss: 1.2829799354076385
          vf_explained_var: -0.1484401673078537
          vf_loss: 1.2956281570924653
    num_agent_steps_sampled: 850000
    num_agent_steps_trained: 850000
    num_steps_sampled: 850000
    num_steps_trained: 850000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,850,23119.1,850000,-39.381,-22.3,-73.1,393.81




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 851000
  custom_metrics: {}
  date: 2021-10-29_03-32-12
  done: false
  episode_len_mean: 393.95
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -39.39500000000029
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2910
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005037101527669663
          cur_lr: 5.000000000000001e-05
          entropy: 1.398544070455763
          entropy_coeff: 0.009999999999999998
          kl: 0.01421986708220244
          policy_loss: -0.1327406363354789
          total_loss: 1.2249271521965663
          vf_explained_var: 0.10316316783428192
          vf_loss: 1.3715816107061174
    num_agent_steps_sampled: 851000
    num_agent_steps_trained: 851000
    num_steps_sampled: 851000
    num_steps_trained: 851000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,851,23153.2,851000,-39.395,-22.3,-73.1,393.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 852000
  custom_metrics: {}
  date: 2021-10-29_03-32-26
  done: false
  episode_len_mean: 395.18
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -39.518000000000285
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2912
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005037101527669663
          cur_lr: 5.000000000000001e-05
          entropy: 1.0416492448912726
          entropy_coeff: 0.009999999999999998
          kl: 0.009719245287376548
          policy_loss: 0.14380671597189373
          total_loss: 0.445983988377783
          vf_explained_var: 0.49941402673721313
          vf_loss: 0.31254481089611846
    num_agent_steps_sampled: 852000
    num_agent_steps_trained: 852000
    num_steps_sampled: 852000
    num_steps_trained: 852000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,852,23167.6,852000,-39.518,-22.3,-73.1,395.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 853000
  custom_metrics: {}
  date: 2021-10-29_03-32-40
  done: false
  episode_len_mean: 400.66
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -40.06600000000029
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2914
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005037101527669663
          cur_lr: 5.000000000000001e-05
          entropy: 0.9147080183029175
          entropy_coeff: 0.009999999999999998
          kl: 0.007682915841728407
          policy_loss: 0.0821134529179997
          total_loss: 0.8368019657002554
          vf_explained_var: -0.08865275233983994
          vf_loss: 0.7637969000471962
    num_agent_steps_sampled: 853000
    num_agent_steps_trained: 853000
    num_steps_sampled: 853000
    num_steps_trained: 853000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,853,23181.6,853000,-40.066,-22.3,-73.1,400.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 854000
  custom_metrics: {}
  date: 2021-10-29_03-32-54
  done: false
  episode_len_mean: 403.94
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -40.3940000000003
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 1
  episodes_total: 2915
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005037101527669663
          cur_lr: 5.000000000000001e-05
          entropy: 0.9271099481317732
          entropy_coeff: 0.009999999999999998
          kl: 0.010978510970568796
          policy_loss: -0.05552578618129094
          total_loss: 0.6521574328343074
          vf_explained_var: 0.3112013638019562
          vf_loss: 0.7168990213217007
    num_agent_steps_sampled: 854000
    num_agent_steps_trained: 854000
    num_steps_sampled: 854000
    num_steps_trained: 854000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,854,23195.6,854000,-40.394,-22.3,-73.1,403.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 855000
  custom_metrics: {}
  date: 2021-10-29_03-33-08
  done: false
  episode_len_mean: 410.97
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -41.09700000000031
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2917
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005037101527669663
          cur_lr: 5.000000000000001e-05
          entropy: 0.9956090874142117
          entropy_coeff: 0.009999999999999998
          kl: 0.009660056378638001
          policy_loss: 0.06392381456163194
          total_loss: 0.6630406848258442
          vf_explained_var: -0.1570103019475937
          vf_loss: 0.6090242923340864
    num_agent_steps_sampled: 855000
    num_agent_steps_trained: 855000
    num_steps_sampled: 855000
    num_steps_trained: 855000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,855,23209.6,855000,-41.097,-22.3,-73.1,410.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 856000
  custom_metrics: {}
  date: 2021-10-29_03-33-26
  done: false
  episode_len_mean: 413.0
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -41.30000000000031
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2919
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005037101527669663
          cur_lr: 5.000000000000001e-05
          entropy: 0.9934014009104835
          entropy_coeff: 0.009999999999999998
          kl: 0.014736933261666725
          policy_loss: -0.09905511190493901
          total_loss: 1.5402527974711524
          vf_explained_var: -0.23558972775936127
          vf_loss: 1.6491676663359007
    num_agent_steps_sampled: 856000
    num_agent_steps_trained: 856000
    num_steps_sampled: 856000
    num_steps_trained: 856000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,856,23227.7,856000,-41.3,-22.3,-73.1,413


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 857000
  custom_metrics: {}
  date: 2021-10-29_03-33-43
  done: false
  episode_len_mean: 413.75
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -41.3750000000003
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2921
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005037101527669663
          cur_lr: 5.000000000000001e-05
          entropy: 0.9901230620013343
          entropy_coeff: 0.009999999999999998
          kl: 0.010343994560409402
          policy_loss: -0.08784347540802426
          total_loss: 1.4463384598493576
          vf_explained_var: 0.015623011626303196
          vf_loss: 1.5440310680203968
    num_agent_steps_sampled: 857000
    num_agent_steps_trained: 857000
    num_steps_sampled: 857000
    num_steps_trained: 857000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,857,23244.9,857000,-41.375,-22.9,-73.1,413.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 858000
  custom_metrics: {}
  date: 2021-10-29_03-34-04
  done: false
  episode_len_mean: 414.7
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -41.47000000000031
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2924
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005037101527669663
          cur_lr: 5.000000000000001e-05
          entropy: 1.419030827946133
          entropy_coeff: 0.009999999999999998
          kl: 0.02132751082103697
          policy_loss: -0.0022864061925146313
          total_loss: 1.4866622110207877
          vf_explained_var: -0.0933125913143158
          vf_loss: 1.50303149720033
    num_agent_steps_sampled: 858000
    num_agent_steps_trained: 858000
    num_steps_sampled: 858000
    num_steps_trained: 858000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,858,23265.3,858000,-41.47,-22.9,-73.1,414.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 859000
  custom_metrics: {}
  date: 2021-10-29_03-34-28
  done: false
  episode_len_mean: 413.77
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -41.3770000000003
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2927
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007555652291504495
          cur_lr: 5.000000000000001e-05
          entropy: 1.3712314811017778
          entropy_coeff: 0.009999999999999998
          kl: 0.02757946018296451
          policy_loss: 0.0687533039185736
          total_loss: 1.1057918866475422
          vf_explained_var: 0.4005323648452759
          vf_loss: 1.0505425161785549
    num_agent_steps_sampled: 859000
    num_agent_steps_trained: 859000
    num_steps_sampled: 859000
    num_steps_trained: 859000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,859,23289.4,859000,-41.377,-22.9,-73.1,413.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 860000
  custom_metrics: {}
  date: 2021-10-29_03-34-49
  done: false
  episode_len_mean: 414.97
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -41.49700000000032
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2930
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01133347843725674
          cur_lr: 5.000000000000001e-05
          entropy: 1.3418910649087694
          entropy_coeff: 0.009999999999999998
          kl: 0.0188707307402454
          policy_loss: 0.07293685326973597
          total_loss: 1.4143561992380353
          vf_explained_var: -0.23787401616573334
          vf_loss: 1.3546243907262883
    num_agent_steps_sampled: 860000
    num_agent_steps_trained: 860000
    num_steps_sampled: 860000
    num_steps_trained: 860000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,860,23310.3,860000,-41.497,-22.9,-73.1,414.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 861000
  custom_metrics: {}
  date: 2021-10-29_03-35-10
  done: false
  episode_len_mean: 416.72
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -41.67200000000031
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2932
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01133347843725674
          cur_lr: 5.000000000000001e-05
          entropy: 1.2111337873670789
          entropy_coeff: 0.009999999999999998
          kl: 0.028291261266955636
          policy_loss: -0.10191689381996791
          total_loss: 1.448431423968739
          vf_explained_var: -0.21294675767421722
          vf_loss: 1.5621390175488261
    num_agent_steps_sampled: 861000
    num_agent_steps_trained: 861000
    num_steps_sampled: 861000
    num_steps_trained: 861000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,861,23331.8,861000,-41.672,-22.9,-73.1,416.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 862000
  custom_metrics: {}
  date: 2021-10-29_03-35-34
  done: false
  episode_len_mean: 418.01
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -41.801000000000315
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 4
  episodes_total: 2936
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01700021765588511
          cur_lr: 5.000000000000001e-05
          entropy: 1.322375570403205
          entropy_coeff: 0.009999999999999998
          kl: 0.04474694873807334
          policy_loss: 0.041371948685910966
          total_loss: 1.2066181533866458
          vf_explained_var: 0.5104620456695557
          vf_loss: 1.177709254788028
    num_agent_steps_sampled: 862000
    num_agent_steps_trained: 862000
    num_steps_sampled: 862000
    num_steps_trained: 862000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,862,23355,862000,-41.801,-22.9,-73.1,418.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 863000
  custom_metrics: {}
  date: 2021-10-29_03-35-55
  done: false
  episode_len_mean: 418.21
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -41.82100000000031
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2938
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.025500326483827666
          cur_lr: 5.000000000000001e-05
          entropy: 1.3978556434313456
          entropy_coeff: 0.009999999999999998
          kl: 0.027273368271460048
          policy_loss: -0.10414128121402529
          total_loss: 1.4056878334946104
          vf_explained_var: -0.2379053384065628
          vf_loss: 1.523112194819583
    num_agent_steps_sampled: 863000
    num_agent_steps_trained: 863000
    num_steps_sampled: 863000
    num_steps_trained: 863000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,863,23376.8,863000,-41.821,-22.9,-73.1,418.21




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 864000
  custom_metrics: {}
  date: 2021-10-29_03-36-30
  done: false
  episode_len_mean: 419.78
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -41.97800000000032
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2941
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.038250489725741514
          cur_lr: 5.000000000000001e-05
          entropy: 1.2242855959468417
          entropy_coeff: 0.009999999999999998
          kl: 0.03740999756537317
          policy_loss: 0.0852821409702301
          total_loss: 0.891891614596049
          vf_explained_var: 0.21792759001255035
          vf_loss: 0.8174213847352398
    num_agent_steps_sampled: 864000
    num_agent_steps_trained: 864000
    num_steps_sampled: 864000
    num_steps_trained: 864000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,864,23411.6,864000,-41.978,-22.9,-73.1,419.78


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 865000
  custom_metrics: {}
  date: 2021-10-29_03-36-48
  done: false
  episode_len_mean: 420.44
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -42.044000000000324
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2943
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05737573458861226
          cur_lr: 5.000000000000001e-05
          entropy: 1.2068589899275037
          entropy_coeff: 0.009999999999999998
          kl: 0.013097382118131091
          policy_loss: -0.05437934630446964
          total_loss: 1.3899619433614943
          vf_explained_var: 0.3164213001728058
          vf_loss: 1.4556584349522987
    num_agent_steps_sampled: 865000
    num_agent_steps_trained: 865000
    num_steps_sampled: 865000
    num_steps_trained: 865000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,865,23429.3,865000,-42.044,-22.9,-73.1,420.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 866000
  custom_metrics: {}
  date: 2021-10-29_03-37-05
  done: false
  episode_len_mean: 421.99
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -42.199000000000325
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2945
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05737573458861226
          cur_lr: 5.000000000000001e-05
          entropy: 1.213153843084971
          entropy_coeff: 0.009999999999999998
          kl: 0.008762761635822534
          policy_loss: -0.0737508696814378
          total_loss: 1.172573778198825
          vf_explained_var: 0.16042359173297882
          vf_loss: 1.257953426366051
    num_agent_steps_sampled: 866000
    num_agent_steps_trained: 866000
    num_steps_sampled: 866000
    num_steps_trained: 866000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,866,23446.5,866000,-42.199,-22.9,-73.1,421.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 867000
  custom_metrics: {}
  date: 2021-10-29_03-37-24
  done: false
  episode_len_mean: 425.7
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -42.57000000000033
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2948
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05737573458861226
          cur_lr: 5.000000000000001e-05
          entropy: 1.467228036456638
          entropy_coeff: 0.009999999999999998
          kl: 0.02617543313581968
          policy_loss: -0.02782397104634179
          total_loss: 1.4041827827692033
          vf_explained_var: 0.3853324055671692
          vf_loss: 1.4451771842315793
    num_agent_steps_sampled: 867000
    num_agent_steps_trained: 867000
    num_steps_sampled: 867000
    num_steps_trained: 867000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,867,23465.5,867000,-42.57,-22.9,-73.1,425.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 868000
  custom_metrics: {}
  date: 2021-10-29_03-37-44
  done: false
  episode_len_mean: 426.84
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -42.684000000000324
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 2
  episodes_total: 2950
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08606360188291838
          cur_lr: 5.000000000000001e-05
          entropy: 1.3117578797870213
          entropy_coeff: 0.009999999999999998
          kl: 0.029393814341927855
          policy_loss: -0.06808916678031286
          total_loss: 1.0957612054215538
          vf_explained_var: -0.23080962896347046
          vf_loss: 1.174438215047121
    num_agent_steps_sampled: 868000
    num_agent_steps_trained: 868000
    num_steps_sampled: 868000
    num_steps_trained: 868000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,868,23485.4,868000,-42.684,-25.3,-73.1,426.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 869000
  custom_metrics: {}
  date: 2021-10-29_03-38-11
  done: false
  episode_len_mean: 419.74
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -41.974000000000316
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 4
  episodes_total: 2954
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1290954028243776
          cur_lr: 5.000000000000001e-05
          entropy: 1.469214019510481
          entropy_coeff: 0.009999999999999998
          kl: 0.026192430070988947
          policy_loss: -0.0001675594598054886
          total_loss: 1.561496865749359
          vf_explained_var: 0.2668813467025757
          vf_loss: 1.572975250085195
    num_agent_steps_sampled: 869000
    num_agent_steps_trained: 869000
    num_steps_sampled: 869000
    num_steps_trained: 869000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,869,23512.3,869000,-41.974,-24.6,-73.1,419.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 870000
  custom_metrics: {}
  date: 2021-10-29_03-38-40
  done: false
  episode_len_mean: 417.47
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -41.74700000000032
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 4
  episodes_total: 2958
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.5280885749393038
          entropy_coeff: 0.009999999999999998
          kl: 0.014696331562263588
          policy_loss: 0.03833344686362478
          total_loss: 1.0066898690329658
          vf_explained_var: 0.7074921131134033
          vf_loss: 0.9807914568318261
    num_agent_steps_sampled: 870000
    num_agent_steps_trained: 870000
    num_steps_sampled: 870000
    num_steps_trained: 870000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,870,23540.9,870000,-41.747,-24.6,-73.1,417.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 871000
  custom_metrics: {}
  date: 2021-10-29_03-39-07
  done: false
  episode_len_mean: 416.49
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -41.64900000000031
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 4
  episodes_total: 2962
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.1989332503742642
          entropy_coeff: 0.009999999999999998
          kl: 0.018714298164273557
          policy_loss: 0.03321798046429952
          total_loss: 0.7335301299889883
          vf_explained_var: 0.6356649994850159
          vf_loss: 0.7086775895622042
    num_agent_steps_sampled: 871000
    num_agent_steps_trained: 871000
    num_steps_sampled: 871000
    num_steps_trained: 871000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,871,23568.4,871000,-41.649,-24.6,-73.1,416.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 872000
  custom_metrics: {}
  date: 2021-10-29_03-39-35
  done: false
  episode_len_mean: 411.5
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -41.1500000000003
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 4
  episodes_total: 2966
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1936431042365664
          cur_lr: 5.000000000000001e-05
          entropy: 1.126654436190923
          entropy_coeff: 0.009999999999999998
          kl: 0.03133252924023766
          policy_loss: -0.05667147586743037
          total_loss: 0.8013877232869466
          vf_explained_var: 0.4744674563407898
          vf_loss: 0.8632584194342295
    num_agent_steps_sampled: 872000
    num_agent_steps_trained: 872000
    num_steps_sampled: 872000
    num_steps_trained: 872000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,872,23595.9,872000,-41.15,-23.3,-73.1,411.5




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 873000
  custom_metrics: {}
  date: 2021-10-29_03-40-20
  done: false
  episode_len_mean: 402.6
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -40.26000000000029
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 4
  episodes_total: 2970
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2904646563548496
          cur_lr: 5.000000000000001e-05
          entropy: 1.5037430816226536
          entropy_coeff: 0.009999999999999998
          kl: 0.018294381023910233
          policy_loss: -0.0968085863524013
          total_loss: 0.9572183529535929
          vf_explained_var: 0.31273800134658813
          vf_loss: 1.0637505014737447
    num_agent_steps_sampled: 873000
    num_agent_steps_trained: 873000
    num_steps_sampled: 873000
    num_steps_trained: 873000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,873,23641.1,873000,-40.26,-23.3,-73.1,402.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 874000
  custom_metrics: {}
  date: 2021-10-29_03-40-49
  done: false
  episode_len_mean: 396.42
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -39.642000000000294
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2973
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2904646563548496
          cur_lr: 5.000000000000001e-05
          entropy: 1.3063026971287197
          entropy_coeff: 0.009999999999999998
          kl: 0.033364659573397085
          policy_loss: -0.038609672917260064
          total_loss: 0.8052204489707947
          vf_explained_var: 0.5440785884857178
          vf_loss: 0.8472018932716714
    num_agent_steps_sampled: 874000
    num_agent_steps_trained: 874000
    num_steps_sampled: 874000
    num_steps_trained: 874000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,874,23669.9,874000,-39.642,-23.3,-73.1,396.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 875000
  custom_metrics: {}
  date: 2021-10-29_03-41-14
  done: false
  episode_len_mean: 389.06
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -38.906000000000276
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 4
  episodes_total: 2977
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4356969845322743
          cur_lr: 5.000000000000001e-05
          entropy: 1.2860555423630609
          entropy_coeff: 0.009999999999999998
          kl: 0.005932507179287485
          policy_loss: 0.08433518244160546
          total_loss: 0.6827295402685801
          vf_explained_var: 0.6317906975746155
          vf_loss: 0.6086701366636488
    num_agent_steps_sampled: 875000
    num_agent_steps_trained: 875000
    num_steps_sampled: 875000
    num_steps_trained: 875000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,875,23694.8,875000,-38.906,-23.3,-73.1,389.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 876000
  custom_metrics: {}
  date: 2021-10-29_03-41-37
  done: false
  episode_len_mean: 384.46
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -38.44600000000027
  episode_reward_min: -73.10000000000012
  episodes_this_iter: 3
  episodes_total: 2980
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4356969845322743
          cur_lr: 5.000000000000001e-05
          entropy: 1.3381093343098958
          entropy_coeff: 0.009999999999999998
          kl: 0.009566709730366874
          policy_loss: 0.09863248012132114
          total_loss: 0.5751523405313492
          vf_explained_var: 0.6197749376296997
          vf_loss: 0.48573276640640367
    num_agent_steps_sampled: 876000
    num_agent_steps_trained: 876000
    num_steps_sampled: 876000
    num_steps_trained: 876000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,876,23718.5,876000,-38.446,-23.3,-73.1,384.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 877000
  custom_metrics: {}
  date: 2021-10-29_03-42-04
  done: false
  episode_len_mean: 372.91
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -37.29100000000026
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 2983
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4356969845322743
          cur_lr: 5.000000000000001e-05
          entropy: 1.28515006436242
          entropy_coeff: 0.009999999999999998
          kl: 0.003934426130232459
          policy_loss: -0.134405230730772
          total_loss: 0.8705953664249844
          vf_explained_var: 0.3957294821739197
          vf_loss: 1.0161378886964587
    num_agent_steps_sampled: 877000
    num_agent_steps_trained: 877000
    num_steps_sampled: 877000
    num_steps_trained: 877000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,877,23744.6,877000,-37.291,-23.3,-63.6,372.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 878000
  custom_metrics: {}
  date: 2021-10-29_03-42-33
  done: false
  episode_len_mean: 359.11
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -35.911000000000236
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 5
  episodes_total: 2988
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21784849226613714
          cur_lr: 5.000000000000001e-05
          entropy: 1.3709946235020956
          entropy_coeff: 0.009999999999999998
          kl: 0.009204180777400555
          policy_loss: -0.02884783728255166
          total_loss: 1.2037634114424387
          vf_explained_var: 0.4242500364780426
          vf_loss: 1.244316073258718
    num_agent_steps_sampled: 878000
    num_agent_steps_trained: 878000
    num_steps_sampled: 878000
    num_steps_trained: 878000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,878,23774.5,878000,-35.911,-23.3,-63.6,359.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 879000
  custom_metrics: {}
  date: 2021-10-29_03-43-00
  done: false
  episode_len_mean: 353.75
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -35.37500000000023
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 2991
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21784849226613714
          cur_lr: 5.000000000000001e-05
          entropy: 1.3665470288859474
          entropy_coeff: 0.009999999999999998
          kl: 0.020162109774090373
          policy_loss: 0.014908325672149659
          total_loss: 0.6103057132826911
          vf_explained_var: 0.5547458529472351
          vf_loss: 0.6046705746402343
    num_agent_steps_sampled: 879000
    num_agent_steps_trained: 879000
    num_steps_sampled: 879000
    num_steps_trained: 879000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,879,23800.9,879000,-35.375,-23.3,-63.6,353.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 880000
  custom_metrics: {}
  date: 2021-10-29_03-43-26
  done: false
  episode_len_mean: 347.7
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -34.77000000000023
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 4
  episodes_total: 2995
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32677273839920584
          cur_lr: 5.000000000000001e-05
          entropy: 1.2877966576152378
          entropy_coeff: 0.009999999999999998
          kl: 0.029832347819668963
          policy_loss: -0.0091385242011812
          total_loss: 0.9986013505193923
          vf_explained_var: 0.4786444902420044
          vf_loss: 1.0108694321579403
    num_agent_steps_sampled: 880000
    num_agent_steps_trained: 880000
    num_steps_sampled: 880000
    num_steps_trained: 880000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,880,23826.6,880000,-34.77,-23.3,-63.6,347.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 881000
  custom_metrics: {}
  date: 2021-10-29_03-43-54
  done: false
  episode_len_mean: 342.98
  episode_media: {}
  episode_reward_max: -23.30000000000006
  episode_reward_mean: -34.29800000000022
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 4
  episodes_total: 2999
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49015910759880854
          cur_lr: 5.000000000000001e-05
          entropy: 1.3768917904959785
          entropy_coeff: 0.009999999999999998
          kl: 0.010657185851802906
          policy_loss: -0.022369483196073108
          total_loss: 1.0586634079615276
          vf_explained_var: 0.37268298864364624
          vf_loss: 1.0895780934227837
    num_agent_steps_sampled: 881000
    num_agent_steps_trained: 881000
    num_steps_sampled: 881000
    num_steps_trained: 881000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,881,23855.4,881000,-34.298,-23.3,-63.6,342.98




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 882000
  custom_metrics: {}
  date: 2021-10-29_03-44-41
  done: false
  episode_len_mean: 338.69
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -33.869000000000206
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 4
  episodes_total: 3003
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49015910759880854
          cur_lr: 5.000000000000001e-05
          entropy: 1.6070487631691828
          entropy_coeff: 0.009999999999999998
          kl: 0.014355516363918103
          policy_loss: -0.04933587966693772
          total_loss: 1.1827310734324985
          vf_explained_var: 0.2907002866268158
          vf_loss: 1.241100949048996
    num_agent_steps_sampled: 882000
    num_agent_steps_trained: 882000
    num_steps_sampled: 882000
    num_steps_trained: 882000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,882,23902.2,882000,-33.869,-22.7,-63.6,338.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 883000
  custom_metrics: {}
  date: 2021-10-29_03-45-08
  done: false
  episode_len_mean: 336.4
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -33.64000000000021
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 3
  episodes_total: 3006
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49015910759880854
          cur_lr: 5.000000000000001e-05
          entropy: 1.2257015738222334
          entropy_coeff: 0.009999999999999998
          kl: 0.01242259760168878
          policy_loss: 0.009328995727830462
          total_loss: 0.7614013797707028
          vf_explained_var: 0.5763131976127625
          vf_loss: 0.7582403513292472
    num_agent_steps_sampled: 883000
    num_agent_steps_trained: 883000
    num_steps_sampled: 883000
    num_steps_trained: 883000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,883,23929.2,883000,-33.64,-22.7,-63.6,336.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 884000
  custom_metrics: {}
  date: 2021-10-29_03-45-36
  done: false
  episode_len_mean: 332.7
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -33.27000000000021
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 4
  episodes_total: 3010
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49015910759880854
          cur_lr: 5.000000000000001e-05
          entropy: 1.2977392607265048
          entropy_coeff: 0.009999999999999998
          kl: 0.014018081997048014
          policy_loss: -0.004186260534657372
          total_loss: 0.9949213359091017
          vf_explained_var: 0.48936980962753296
          vf_loss: 1.005213909678989
    num_agent_steps_sampled: 884000
    num_agent_steps_trained: 884000
    num_steps_sampled: 884000
    num_steps_trained: 884000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,884,23956.8,884000,-33.27,-22.7,-63.6,332.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 885000
  custom_metrics: {}
  date: 2021-10-29_03-46-04
  done: false
  episode_len_mean: 319.18
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -31.918000000000184
  episode_reward_min: -63.600000000000634
  episodes_this_iter: 4
  episodes_total: 3014
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49015910759880854
          cur_lr: 5.000000000000001e-05
          entropy: 1.365069075425466
          entropy_coeff: 0.009999999999999998
          kl: 0.016069128978274075
          policy_loss: 0.03413747491108047
          total_loss: 1.0774927655855815
          vf_explained_var: 0.3136836290359497
          vf_loss: 1.0491295589341059
    num_agent_steps_sampled: 885000
    num_agent_steps_trained: 885000
    num_steps_sampled: 885000
    num_steps_trained: 885000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,885,23984.9,885000,-31.918,-22.7,-63.6,319.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 886000
  custom_metrics: {}
  date: 2021-10-29_03-46-31
  done: false
  episode_len_mean: 305.84
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -30.584000000000163
  episode_reward_min: -59.00000000000057
  episodes_this_iter: 4
  episodes_total: 3018
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49015910759880854
          cur_lr: 5.000000000000001e-05
          entropy: 1.6066121406025358
          entropy_coeff: 0.009999999999999998
          kl: 0.0056692386385018756
          policy_loss: 0.030402305805020863
          total_loss: 1.21555095911026
          vf_explained_var: 0.27430903911590576
          vf_loss: 1.1984359251128303
    num_agent_steps_sampled: 886000
    num_agent_steps_trained: 886000
    num_steps_sampled: 886000
    num_steps_trained: 886000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,886,24012.2,886000,-30.584,-22.7,-59,305.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 887000
  custom_metrics: {}
  date: 2021-10-29_03-47-00
  done: false
  episode_len_mean: 301.75
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -30.175000000000153
  episode_reward_min: -59.00000000000057
  episodes_this_iter: 3
  episodes_total: 3021
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49015910759880854
          cur_lr: 5.000000000000001e-05
          entropy: 1.0900885270701515
          entropy_coeff: 0.009999999999999998
          kl: 0.015249418447953566
          policy_loss: -0.07033926132652495
          total_loss: 0.8970503078566657
          vf_explained_var: 0.3514101803302765
          vf_loss: 0.9708158135414123
    num_agent_steps_sampled: 887000
    num_agent_steps_trained: 887000
    num_steps_sampled: 887000
    num_steps_trained: 887000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,887,24040.5,887000,-30.175,-22.7,-59,301.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 888000
  custom_metrics: {}
  date: 2021-10-29_03-47-28
  done: false
  episode_len_mean: 295.87
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.58700000000015
  episode_reward_min: -59.00000000000057
  episodes_this_iter: 4
  episodes_total: 3025
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49015910759880854
          cur_lr: 5.000000000000001e-05
          entropy: 1.1497634099589453
          entropy_coeff: 0.009999999999999998
          kl: 0.020339174982579776
          policy_loss: 0.0009521001742945777
          total_loss: 0.6763749172290167
          vf_explained_var: 0.6265020370483398
          vf_loss: 0.6769510228600767
    num_agent_steps_sampled: 888000
    num_agent_steps_trained: 888000
    num_steps_sampled: 888000
    num_steps_trained: 888000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,888,24068.9,888000,-29.587,-22.7,-59,295.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 889000
  custom_metrics: {}
  date: 2021-10-29_03-47-56
  done: false
  episode_len_mean: 292.12
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.21200000000015
  episode_reward_min: -59.00000000000057
  episodes_this_iter: 4
  episodes_total: 3029
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.735238661398213
          cur_lr: 5.000000000000001e-05
          entropy: 1.1973454475402832
          entropy_coeff: 0.009999999999999998
          kl: 0.009520342191668894
          policy_loss: 0.014364349842071533
          total_loss: 0.9977996985117594
          vf_explained_var: 0.42811816930770874
          vf_loss: 0.9884090827571022
    num_agent_steps_sampled: 889000
    num_agent_steps_trained: 889000
    num_steps_sampled: 889000
    num_steps_trained: 889000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,889,24097.1,889000,-29.212,-22.7,-59,292.12




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 890000
  custom_metrics: {}
  date: 2021-10-29_03-48-43
  done: false
  episode_len_mean: 287.78
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -28.778000000000137
  episode_reward_min: -59.00000000000057
  episodes_this_iter: 4
  episodes_total: 3033
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.735238661398213
          cur_lr: 5.000000000000001e-05
          entropy: 1.3728625350528294
          entropy_coeff: 0.009999999999999998
          kl: 0.004725241216225918
          policy_loss: -0.023384771992762884
          total_loss: 1.0822901613182492
          vf_explained_var: 0.3540949523448944
          vf_loss: 1.11592938568857
    num_agent_steps_sampled: 890000
    num_agent_steps_trained: 890000
    num_steps_sampled: 890000
    num_steps_trained: 890000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,890,24143.7,890000,-28.778,-22.7,-59,287.78


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 891000
  custom_metrics: {}
  date: 2021-10-29_03-49-10
  done: false
  episode_len_mean: 285.48
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -28.548000000000137
  episode_reward_min: -59.00000000000057
  episodes_this_iter: 4
  episodes_total: 3037
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3676193306991065
          cur_lr: 5.000000000000001e-05
          entropy: 1.2176396773921119
          entropy_coeff: 0.009999999999999998
          kl: 0.008855186920337764
          policy_loss: -0.013501840581496557
          total_loss: 0.9761906610594855
          vf_explained_var: 0.4361158013343811
          vf_loss: 0.9986135542392731
    num_agent_steps_sampled: 891000
    num_agent_steps_trained: 891000
    num_steps_sampled: 891000
    num_steps_trained: 891000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,891,24170.9,891000,-28.548,-22.7,-59,285.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 892000
  custom_metrics: {}
  date: 2021-10-29_03-49-36
  done: false
  episode_len_mean: 281.16
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -28.116000000000128
  episode_reward_min: -47.3000000000004
  episodes_this_iter: 4
  episodes_total: 3041
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3676193306991065
          cur_lr: 5.000000000000001e-05
          entropy: 1.0459777116775513
          entropy_coeff: 0.009999999999999998
          kl: 0.0038314962909405874
          policy_loss: -0.01564322482380602
          total_loss: 0.815300006336636
          vf_explained_var: 0.6025713086128235
          vf_loss: 0.8399944699472851
    num_agent_steps_sampled: 892000
    num_agent_steps_trained: 892000
    num_steps_sampled: 892000
    num_steps_trained: 892000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,892,24197.2,892000,-28.116,-22.7,-47.3,281.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 893000
  custom_metrics: {}
  date: 2021-10-29_03-50-05
  done: false
  episode_len_mean: 275.83
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -27.58300000000012
  episode_reward_min: -44.40000000000036
  episodes_this_iter: 3
  episodes_total: 3044
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18380966534955326
          cur_lr: 5.000000000000001e-05
          entropy: 1.1421136922306485
          entropy_coeff: 0.009999999999999998
          kl: 0.009805042118615069
          policy_loss: -0.03598447996709082
          total_loss: 0.6741264588303036
          vf_explained_var: 0.6518458724021912
          vf_loss: 0.719729815920194
    num_agent_steps_sampled: 893000
    num_agent_steps_trained: 893000
    num_steps_sampled: 893000
    num_steps_trained: 893000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,893,24225.6,893000,-27.583,-22.7,-44.4,275.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 894000
  custom_metrics: {}
  date: 2021-10-29_03-50-31
  done: false
  episode_len_mean: 270.02
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -27.002000000000116
  episode_reward_min: -41.30000000000032
  episodes_this_iter: 4
  episodes_total: 3048
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18380966534955326
          cur_lr: 5.000000000000001e-05
          entropy: 1.0607529150115118
          entropy_coeff: 0.009999999999999998
          kl: 0.02434057561194061
          policy_loss: 0.06862627392013868
          total_loss: 0.8541577561034097
          vf_explained_var: 0.6349852085113525
          vf_loss: 0.791664974598421
    num_agent_steps_sampled: 894000
    num_agent_steps_trained: 894000
    num_steps_sampled: 894000
    num_steps_trained: 894000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,894,24251.3,894000,-27.002,-22.7,-41.3,270.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 895000
  custom_metrics: {}
  date: 2021-10-29_03-51-00
  done: false
  episode_len_mean: 265.01
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -26.50100000000011
  episode_reward_min: -33.1000000000002
  episodes_this_iter: 4
  episodes_total: 3052
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27571449802432985
          cur_lr: 5.000000000000001e-05
          entropy: 1.1661583920319876
          entropy_coeff: 0.009999999999999998
          kl: 0.010184524494743306
          policy_loss: 0.036060720599359934
          total_loss: 0.5176465355687672
          vf_explained_var: 0.70023113489151
          vf_loss: 0.49043937606943977
    num_agent_steps_sampled: 895000
    num_agent_steps_trained: 895000
    num_steps_sampled: 895000
    num_steps_trained: 895000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,895,24280.7,895000,-26.501,-22.7,-33.1,265.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 896000
  custom_metrics: {}
  date: 2021-10-29_03-51-26
  done: false
  episode_len_mean: 265.92
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -26.59200000000011
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 3056
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27571449802432985
          cur_lr: 5.000000000000001e-05
          entropy: 1.1509505801730686
          entropy_coeff: 0.009999999999999998
          kl: 0.01244172988134385
          policy_loss: -0.013215589026610056
          total_loss: 0.8377573053042094
          vf_explained_var: 0.5722570419311523
          vf_loss: 0.859052042166392
    num_agent_steps_sampled: 896000
    num_agent_steps_trained: 896000
    num_steps_sampled: 896000
    num_steps_trained: 896000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,896,24306.9,896000,-26.592,-22.7,-38.1,265.92




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 897000
  custom_metrics: {}
  date: 2021-10-29_03-52-12
  done: false
  episode_len_mean: 264.65
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.465000000000103
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 3060
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27571449802432985
          cur_lr: 5.000000000000001e-05
          entropy: 1.1365267150931888
          entropy_coeff: 0.009999999999999998
          kl: 0.011599737188211003
          policy_loss: 0.02857501283288002
          total_loss: 0.7873895009358723
          vf_explained_var: 0.26135870814323425
          vf_loss: 0.766981550720003
    num_agent_steps_sampled: 897000
    num_agent_steps_trained: 897000
    num_steps_sampled: 897000
    num_steps_trained: 897000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,897,24352.9,897000,-26.465,-20.7,-38.1,264.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 898000
  custom_metrics: {}
  date: 2021-10-29_03-52-31
  done: false
  episode_len_mean: 265.36
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.536000000000104
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 2
  episodes_total: 3062
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27571449802432985
          cur_lr: 5.000000000000001e-05
          entropy: 1.0337940447860294
          entropy_coeff: 0.009999999999999998
          kl: 0.013304393124064775
          policy_loss: -0.04683839273121622
          total_loss: 0.6984156192176871
          vf_explained_var: 0.38374459743499756
          vf_loss: 0.7519237349844641
    num_agent_steps_sampled: 898000
    num_agent_steps_trained: 898000
    num_steps_sampled: 898000
    num_steps_trained: 898000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,898,24371.2,898000,-26.536,-20.7,-38.1,265.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 899000
  custom_metrics: {}
  date: 2021-10-29_03-52-42
  done: false
  episode_len_mean: 271.35
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.135000000000094
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 1
  episodes_total: 3063
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27571449802432985
          cur_lr: 5.000000000000001e-05
          entropy: 0.6795197917355431
          entropy_coeff: 0.009999999999999998
          kl: 0.0034074000686955073
          policy_loss: -0.054677500741349326
          total_loss: 0.5745486651029852
          vf_explained_var: 0.11858375370502472
          vf_loss: 0.6350818903495868
    num_agent_steps_sampled: 899000
    num_agent_steps_trained: 899000
    num_steps_sampled: 899000
    num_steps_trained: 899000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,899,24382.3,899000,-27.135,-20.7,-83.2,271.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 900000
  custom_metrics: {}
  date: 2021-10-29_03-53-06
  done: false
  episode_len_mean: 277.62
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.762000000000107
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 4
  episodes_total: 3067
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13785724901216492
          cur_lr: 5.000000000000001e-05
          entropy: 1.2172332379553052
          entropy_coeff: 0.009999999999999998
          kl: 0.014740335257619898
          policy_loss: -0.0010011075271500482
          total_loss: 1.2070825788709851
          vf_explained_var: 0.19963344931602478
          vf_loss: 1.2182239492734273
    num_agent_steps_sampled: 900000
    num_agent_steps_trained: 900000
    num_steps_sampled: 900000
    num_steps_trained: 900000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,900,24406.2,900000,-27.762,-20.7,-83.2,277.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 901000
  custom_metrics: {}
  date: 2021-10-29_03-53-36
  done: false
  episode_len_mean: 276.6
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.660000000000096
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 4
  episodes_total: 3071
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13785724901216492
          cur_lr: 5.000000000000001e-05
          entropy: 1.1882003294097052
          entropy_coeff: 0.009999999999999998
          kl: 0.006799753073915346
          policy_loss: 0.04291214272379875
          total_loss: 0.9417201227611965
          vf_explained_var: 0.31528255343437195
          vf_loss: 0.9097525884707769
    num_agent_steps_sampled: 901000
    num_agent_steps_trained: 901000
    num_steps_sampled: 901000
    num_steps_trained: 901000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,901,24436.2,901000,-27.66,-20.7,-83.2,276.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 902000
  custom_metrics: {}
  date: 2021-10-29_03-53-53
  done: false
  episode_len_mean: 278.9
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -27.8900000000001
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 2
  episodes_total: 3073
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13785724901216492
          cur_lr: 5.000000000000001e-05
          entropy: 1.0432165582974753
          entropy_coeff: 0.009999999999999998
          kl: 0.02385631077041634
          policy_loss: -0.10510096152623495
          total_loss: 0.7569343934456507
          vf_explained_var: 0.5228835344314575
          vf_loss: 0.8691787514421675
    num_agent_steps_sampled: 902000
    num_agent_steps_trained: 902000
    num_steps_sampled: 902000
    num_steps_trained: 902000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,902,24453.8,902000,-27.89,-20.7,-83.2,278.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 903000
  custom_metrics: {}
  date: 2021-10-29_03-54-11
  done: false
  episode_len_mean: 284.03
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -28.403000000000112
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 2
  episodes_total: 3075
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20678587351824745
          cur_lr: 5.000000000000001e-05
          entropy: 0.9375752475526598
          entropy_coeff: 0.009999999999999998
          kl: 0.010960977753375624
          policy_loss: 0.08508715604742369
          total_loss: 0.5972402473290761
          vf_explained_var: 0.5764766931533813
          vf_loss: 0.5192622722643945
    num_agent_steps_sampled: 903000
    num_agent_steps_trained: 903000
    num_steps_sampled: 903000
    num_steps_trained: 903000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,903,24471.7,903000,-28.403,-20.7,-83.2,284.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 904000
  custom_metrics: {}
  date: 2021-10-29_03-54-31
  done: false
  episode_len_mean: 285.1
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -28.510000000000108
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 3
  episodes_total: 3078
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20678587351824745
          cur_lr: 5.000000000000001e-05
          entropy: 1.0441848622428047
          entropy_coeff: 0.009999999999999998
          kl: 0.007984280134809769
          policy_loss: 0.02539292441474067
          total_loss: 0.7761419120762083
          vf_explained_var: 0.367110937833786
          vf_loss: 0.7595398018757502
    num_agent_steps_sampled: 904000
    num_agent_steps_trained: 904000
    num_steps_sampled: 904000
    num_steps_trained: 904000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,904,24491.6,904000,-28.51,-20.7,-83.2,285.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 905000
  custom_metrics: {}
  date: 2021-10-29_03-54-46
  done: false
  episode_len_mean: 290.63
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -29.063000000000123
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 2
  episodes_total: 3080
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20678587351824745
          cur_lr: 5.000000000000001e-05
          entropy: 0.9862538364198473
          entropy_coeff: 0.009999999999999998
          kl: 0.006285177085533731
          policy_loss: 0.13161549535062578
          total_loss: 0.654327098859681
          vf_explained_var: 0.3815153241157532
          vf_loss: 0.5312744501357277
    num_agent_steps_sampled: 905000
    num_agent_steps_trained: 905000
    num_steps_sampled: 905000
    num_steps_trained: 905000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,905,24507,905000,-29.063,-20.7,-83.2,290.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 906000
  custom_metrics: {}
  date: 2021-10-29_03-55-08
  done: false
  episode_len_mean: 291.79
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -29.179000000000123
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 3
  episodes_total: 3083
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20678587351824745
          cur_lr: 5.000000000000001e-05
          entropy: 1.0754686541027494
          entropy_coeff: 0.009999999999999998
          kl: 0.008463397666365799
          policy_loss: 0.08495991445249981
          total_loss: 0.7871632142199411
          vf_explained_var: 0.3530968427658081
          vf_loss: 0.7112078787758946
    num_agent_steps_sampled: 906000
    num_agent_steps_trained: 906000
    num_steps_sampled: 906000
    num_steps_trained: 906000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,906,24528.6,906000,-29.179,-20.7,-83.2,291.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 907000
  custom_metrics: {}
  date: 2021-10-29_03-55-29
  done: false
  episode_len_mean: 293.36
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -29.336000000000123
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 2
  episodes_total: 3085
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20678587351824745
          cur_lr: 5.000000000000001e-05
          entropy: 1.1146618313259549
          entropy_coeff: 0.009999999999999998
          kl: 0.014641033434875795
          policy_loss: -0.05175392246908612
          total_loss: 0.9081967039240731
          vf_explained_var: -0.038596466183662415
          vf_loss: 0.9680696797246734
    num_agent_steps_sampled: 907000
    num_agent_steps_trained: 907000
    num_steps_sampled: 907000
    num_steps_trained: 907000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,907,24549.8,907000,-29.336,-20.7,-83.2,293.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 908000
  custom_metrics: {}
  date: 2021-10-29_03-55-58
  done: false
  episode_len_mean: 295.8
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -29.580000000000133
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 4
  episodes_total: 3089
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20678587351824745
          cur_lr: 5.000000000000001e-05
          entropy: 0.9938584162129296
          entropy_coeff: 0.009999999999999998
          kl: 0.021279155022440772
          policy_loss: -0.039484243508842255
          total_loss: 1.1418671462270948
          vf_explained_var: 0.29506900906562805
          vf_loss: 1.1868897471163007
    num_agent_steps_sampled: 908000
    num_agent_steps_trained: 908000
    num_steps_sampled: 908000
    num_steps_trained: 908000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,908,24578.1,908000,-29.58,-20.7,-83.2,295.8




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 909000
  custom_metrics: {}
  date: 2021-10-29_03-56-33
  done: false
  episode_len_mean: 298.37
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -29.837000000000124
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 3
  episodes_total: 3092
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3101788102773711
          cur_lr: 5.000000000000001e-05
          entropy: 0.7273099389341142
          entropy_coeff: 0.009999999999999998
          kl: 0.007159184371408381
          policy_loss: 0.09486734751198027
          total_loss: 0.7489987625016107
          vf_explained_var: 0.10039462149143219
          vf_loss: 0.6591838892963198
    num_agent_steps_sampled: 909000
    num_agent_steps_trained: 909000
    num_steps_sampled: 909000
    num_steps_trained: 909000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,909,24613.8,909000,-29.837,-20.7,-83.2,298.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 910000
  custom_metrics: {}
  date: 2021-10-29_03-56-53
  done: false
  episode_len_mean: 299.38
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -29.93800000000013
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 2
  episodes_total: 3094
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3101788102773711
          cur_lr: 5.000000000000001e-05
          entropy: 1.3665387709935506
          entropy_coeff: 0.009999999999999998
          kl: 0.013849260572080881
          policy_loss: -0.08311039639843834
          total_loss: 1.0665570525659456
          vf_explained_var: 0.2015840709209442
          vf_loss: 1.1590370919969346
    num_agent_steps_sampled: 910000
    num_agent_steps_trained: 910000
    num_steps_sampled: 910000
    num_steps_trained: 910000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,910,24632.9,910000,-29.938,-20.7,-83.2,299.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 911000
  custom_metrics: {}
  date: 2021-10-29_03-57-16
  done: false
  episode_len_mean: 303.54
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -30.354000000000134
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 3
  episodes_total: 3097
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3101788102773711
          cur_lr: 5.000000000000001e-05
          entropy: 1.0748315645588769
          entropy_coeff: 0.009999999999999998
          kl: 0.01036216697259778
          policy_loss: 0.038713587820529936
          total_loss: 1.2040910972489252
          vf_explained_var: 0.2230684906244278
          vf_loss: 1.172911712196138
    num_agent_steps_sampled: 911000
    num_agent_steps_trained: 911000
    num_steps_sampled: 911000
    num_steps_trained: 911000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,911,24656.8,911000,-30.354,-20.7,-83.2,303.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 912000
  custom_metrics: {}
  date: 2021-10-29_03-57-46
  done: false
  episode_len_mean: 303.11
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -30.311000000000128
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 4
  episodes_total: 3101
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3101788102773711
          cur_lr: 5.000000000000001e-05
          entropy: 0.9272885620594025
          entropy_coeff: 0.009999999999999998
          kl: 0.003217512534262923
          policy_loss: -0.055006158144937624
          total_loss: 1.3479817191759744
          vf_explained_var: 0.2630334198474884
          vf_loss: 1.411262755923801
    num_agent_steps_sampled: 912000
    num_agent_steps_trained: 912000
    num_steps_sampled: 912000
    num_steps_trained: 912000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,912,24686.1,912000,-30.311,-20.7,-83.2,303.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 913000
  custom_metrics: {}
  date: 2021-10-29_03-58-02
  done: false
  episode_len_mean: 308.3
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -30.830000000000144
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 2
  episodes_total: 3103
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15508940513868555
          cur_lr: 5.000000000000001e-05
          entropy: 1.090979414847162
          entropy_coeff: 0.009999999999999998
          kl: 0.008513505467636406
          policy_loss: 0.07208196537362205
          total_loss: 0.6500723188122114
          vf_explained_var: 0.07301989942789078
          vf_loss: 0.587579784480234
    num_agent_steps_sampled: 913000
    num_agent_steps_trained: 913000
    num_steps_sampled: 913000
    num_steps_trained: 913000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,913,24701.9,913000,-30.83,-20.7,-83.2,308.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 914000
  custom_metrics: {}
  date: 2021-10-29_03-58-27
  done: false
  episode_len_mean: 309.64
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -30.96400000000014
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 3
  episodes_total: 3106
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15508940513868555
          cur_lr: 5.000000000000001e-05
          entropy: 0.9933831168545617
          entropy_coeff: 0.009999999999999998
          kl: 0.012003702165454181
          policy_loss: -0.03099269304010603
          total_loss: 1.0330112583107418
          vf_explained_var: -0.051141560077667236
          vf_loss: 1.072076129830546
    num_agent_steps_sampled: 914000
    num_agent_steps_trained: 914000
    num_steps_sampled: 914000
    num_steps_trained: 914000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,914,24727.4,914000,-30.964,-20.7,-83.2,309.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 915000
  custom_metrics: {}
  date: 2021-10-29_03-58-57
  done: false
  episode_len_mean: 308.28
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -30.828000000000138
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 5
  episodes_total: 3111
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15508940513868555
          cur_lr: 5.000000000000001e-05
          entropy: 0.8178966701030731
          entropy_coeff: 0.009999999999999998
          kl: 0.011870121919395791
          policy_loss: -0.0020175408985879687
          total_loss: 1.4232999404271445
          vf_explained_var: 0.4651201069355011
          vf_loss: 1.4316555122534433
    num_agent_steps_sampled: 915000
    num_agent_steps_trained: 915000
    num_steps_sampled: 915000
    num_steps_trained: 915000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,915,24757.4,915000,-30.828,-20.7,-83.2,308.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 916000
  custom_metrics: {}
  date: 2021-10-29_03-59-28
  done: false
  episode_len_mean: 307.54
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -30.754000000000136
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 4
  episodes_total: 3115
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15508940513868555
          cur_lr: 5.000000000000001e-05
          entropy: 0.9291372835636139
          entropy_coeff: 0.009999999999999998
          kl: 0.014321155852565524
          policy_loss: 0.023045665025711058
          total_loss: 0.9601118852694829
          vf_explained_var: 0.475375235080719
          vf_loss: 0.9441365347968207
    num_agent_steps_sampled: 916000
    num_agent_steps_trained: 916000
    num_steps_sampled: 916000
    num_steps_trained: 916000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,916,24787.8,916000,-30.754,-20.7,-83.2,307.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 917000
  custom_metrics: {}
  date: 2021-10-29_03-59-55
  done: false
  episode_len_mean: 306.45
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -30.645000000000138
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 4
  episodes_total: 3119
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15508940513868555
          cur_lr: 5.000000000000001e-05
          entropy: 0.8192450867758857
          entropy_coeff: 0.009999999999999998
          kl: 0.0035330952089736123
          policy_loss: -0.003147933011253675
          total_loss: 1.2779346346855163
          vf_explained_var: 0.25757595896720886
          vf_loss: 1.2887270781728957
    num_agent_steps_sampled: 917000
    num_agent_steps_trained: 917000
    num_steps_sampled: 917000
    num_steps_trained: 917000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,917,24815,917000,-30.645,-20.7,-83.2,306.45




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 918000
  custom_metrics: {}
  date: 2021-10-29_04-00-28
  done: false
  episode_len_mean: 310.26
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -31.02600000000014
  episode_reward_min: -83.19999999999955
  episodes_this_iter: 2
  episodes_total: 3121
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07754470256934277
          cur_lr: 5.000000000000001e-05
          entropy: 0.9088925686147478
          entropy_coeff: 0.009999999999999998
          kl: 0.02850043924291666
          policy_loss: 0.06875129068891207
          total_loss: 0.5335856295294232
          vf_explained_var: 0.6357801556587219
          vf_loss: 0.47171319764521386
    num_agent_steps_sampled: 918000
    num_agent_steps_trained: 918000
    num_steps_sampled: 918000
    num_steps_trained: 918000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,918,24848,918000,-31.026,-20.7,-83.2,310.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 919000
  custom_metrics: {}
  date: 2021-10-29_04-00-41
  done: false
  episode_len_mean: 316.69
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -31.66900000000013
  episode_reward_min: -89.3999999999992
  episodes_this_iter: 1
  episodes_total: 3122
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11631705385401418
          cur_lr: 5.000000000000001e-05
          entropy: 0.38974733518229593
          entropy_coeff: 0.009999999999999998
          kl: 0.0023897703547687697
          policy_loss: 0.02563649813334147
          total_loss: 0.5067159599728055
          vf_explained_var: 0.3292114734649658
          vf_loss: 0.4846989728199939
    num_agent_steps_sampled: 919000
    num_agent_steps_trained: 919000
    num_steps_sampled: 919000
    num_steps_trained: 919000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,919,24861.7,919000,-31.669,-20.7,-89.4,316.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 920000
  custom_metrics: {}
  date: 2021-10-29_04-01-03
  done: false
  episode_len_mean: 320.61
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -32.06100000000014
  episode_reward_min: -89.3999999999992
  episodes_this_iter: 3
  episodes_total: 3125
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 1.207882527510325
          entropy_coeff: 0.009999999999999998
          kl: 0.010061128010476574
          policy_loss: 0.05428188393513362
          total_loss: 1.1122384263409508
          vf_explained_var: 0.23823578655719757
          vf_loss: 1.0694502232389318
    num_agent_steps_sampled: 920000
    num_agent_steps_trained: 920000
    num_steps_sampled: 920000
    num_steps_trained: 920000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,920,24882.7,920000,-32.061,-20.7,-89.4,320.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 921000
  custom_metrics: {}
  date: 2021-10-29_04-01-15
  done: false
  episode_len_mean: 321.03
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -32.103000000000144
  episode_reward_min: -89.3999999999992
  episodes_this_iter: 1
  episodes_total: 3126
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05815852692700709
          cur_lr: 5.000000000000001e-05
          entropy: 0.6417184592949019
          entropy_coeff: 0.009999999999999998
          kl: 0.0888548833047214
          policy_loss: -0.061292015430000096
          total_loss: 0.5769914087322023
          vf_explained_var: 0.3567006289958954
          vf_loss: 0.639532940586408
    num_agent_steps_sampled: 921000
    num_agent_steps_trained: 921000
    num_steps_sampled: 921000
    num_steps_trained: 921000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,921,24895.2,921000,-32.103,-20.7,-89.4,321.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 922000
  custom_metrics: {}
  date: 2021-10-29_04-01-27
  done: false
  episode_len_mean: 333.55
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -33.355000000000125
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3128
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0872377903905106
          cur_lr: 5.000000000000001e-05
          entropy: 0.48106571634610495
          entropy_coeff: 0.009999999999999998
          kl: 0.00480639148772484
          policy_loss: 0.11385802576939265
          total_loss: 0.7031560765372382
          vf_explained_var: 0.3558434844017029
          vf_loss: 0.5936894129133887
    num_agent_steps_sampled: 922000
    num_agent_steps_trained: 922000
    num_steps_sampled: 922000
    num_steps_trained: 922000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,922,24906.9,922000,-33.355,-20.7,-92.1,333.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 923000
  custom_metrics: {}
  date: 2021-10-29_04-01-43
  done: false
  episode_len_mean: 338.39
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -33.839000000000134
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3130
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0436188951952553
          cur_lr: 5.000000000000001e-05
          entropy: 1.252030685212877
          entropy_coeff: 0.009999999999999998
          kl: 0.046708688702768064
          policy_loss: 0.11887130604849921
          total_loss: 0.27940589963561957
          vf_explained_var: 0.45973068475723267
          vf_loss: 0.17101751737193102
    num_agent_steps_sampled: 923000
    num_agent_steps_trained: 923000
    num_steps_sampled: 923000
    num_steps_trained: 923000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,923,24923.1,923000,-33.839,-20.7,-92.1,338.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 924000
  custom_metrics: {}
  date: 2021-10-29_04-02-04
  done: false
  episode_len_mean: 341.91
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -34.19100000000014
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3133
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.065428342792883
          cur_lr: 5.000000000000001e-05
          entropy: 1.017539103825887
          entropy_coeff: 0.009999999999999998
          kl: 0.06329821285444331
          policy_loss: -0.04309001215216186
          total_loss: 0.5321195386764076
          vf_explained_var: 0.08807634562253952
          vf_loss: 0.5812434608323707
    num_agent_steps_sampled: 924000
    num_agent_steps_trained: 924000
    num_steps_sampled: 924000
    num_steps_trained: 924000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,924,24944.5,924000,-34.191,-20.7,-92.1,341.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 925000
  custom_metrics: {}
  date: 2021-10-29_04-02-26
  done: false
  episode_len_mean: 341.56
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -34.156000000000134
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3135
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0981425141893245
          cur_lr: 5.000000000000001e-05
          entropy: 1.001033447848426
          entropy_coeff: 0.009999999999999998
          kl: 0.01839842182734104
          policy_loss: 0.0010221972233719295
          total_loss: 0.375987217326959
          vf_explained_var: 0.7209179997444153
          vf_loss: 0.3831696844763226
    num_agent_steps_sampled: 925000
    num_agent_steps_trained: 925000
    num_steps_sampled: 925000
    num_steps_trained: 925000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,925,24966.5,925000,-34.156,-20.7,-92.1,341.56


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 926000
  custom_metrics: {}
  date: 2021-10-29_04-02-47
  done: false
  episode_len_mean: 347.54
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -34.75400000000014
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3138
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0981425141893245
          cur_lr: 5.000000000000001e-05
          entropy: 1.1318589654233722
          entropy_coeff: 0.009999999999999998
          kl: 0.02892053271244371
          policy_loss: 0.07254892289638519
          total_loss: 0.9791398086481624
          vf_explained_var: -0.26495951414108276
          vf_loss: 0.9150711534337865
    num_agent_steps_sampled: 926000
    num_agent_steps_trained: 926000
    num_steps_sampled: 926000
    num_steps_trained: 926000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,926,24986.7,926000,-34.754,-20.7,-92.1,347.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 927000
  custom_metrics: {}
  date: 2021-10-29_04-03-14
  done: false
  episode_len_mean: 345.85
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -34.58500000000014
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 4
  episodes_total: 3142
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 1.0358298480510713
          entropy_coeff: 0.009999999999999998
          kl: 0.012749216229822835
          policy_loss: 0.03922683439320988
          total_loss: 0.5119426435894436
          vf_explained_var: 0.7284471988677979
          vf_loss: 0.48119724426004623
    num_agent_steps_sampled: 927000
    num_agent_steps_trained: 927000
    num_steps_sampled: 927000
    num_steps_trained: 927000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,927,25014.2,927000,-34.585,-20.7,-92.1,345.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 928000
  custom_metrics: {}
  date: 2021-10-29_04-03-32
  done: false
  episode_len_mean: 348.7
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -34.87000000000015
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3144
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.981767307387458
          entropy_coeff: 0.009999999999999998
          kl: 0.009807972514952388
          policy_loss: -0.00696769654750824
          total_loss: 0.3968846079376009
          vf_explained_var: 0.5634840130805969
          vf_loss: 0.4122261108830571
    num_agent_steps_sampled: 928000
    num_agent_steps_trained: 928000
    num_steps_sampled: 928000
    num_steps_trained: 928000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,928,25031.8,928000,-34.87,-20.7,-92.1,348.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 929000
  custom_metrics: {}
  date: 2021-10-29_04-03-47
  done: false
  episode_len_mean: 356.33
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -35.633000000000166
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3146
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.8394144988722272
          entropy_coeff: 0.009999999999999998
          kl: 0.012982292385290368
          policy_loss: 0.1220887306663725
          total_loss: 0.7861999349461661
          vf_explained_var: -0.22951507568359375
          vf_loss: 0.6705941780366831
    num_agent_steps_sampled: 929000
    num_agent_steps_trained: 929000
    num_steps_sampled: 929000
    num_steps_trained: 929000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,929,25046.7,929000,-35.633,-20.7,-92.1,356.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 930000
  custom_metrics: {}
  date: 2021-10-29_04-04-07
  done: false
  episode_len_mean: 355.1
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -35.510000000000154
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3148
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.9945278657807244
          entropy_coeff: 0.009999999999999998
          kl: 0.008616883355624811
          policy_loss: -0.028217971469793053
          total_loss: 0.3440973252782391
          vf_explained_var: 0.758875846862793
          vf_loss: 0.3809920457010675
    num_agent_steps_sampled: 930000
    num_agent_steps_trained: 930000
    num_steps_sampled: 930000
    num_steps_trained: 930000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,930,25066.6,930000,-35.51,-20.7,-92.1,355.1




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 931000
  custom_metrics: {}
  date: 2021-10-29_04-04-44
  done: false
  episode_len_mean: 358.92
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -35.89200000000016
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3151
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.7807020760244794
          entropy_coeff: 0.009999999999999998
          kl: 0.013549901498341576
          policy_loss: -0.019114038596550625
          total_loss: 0.9153412682314713
          vf_explained_var: 0.24036753177642822
          vf_loss: 0.9402676065659358
    num_agent_steps_sampled: 931000
    num_agent_steps_trained: 931000
    num_steps_sampled: 931000
    num_steps_trained: 931000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,931,25104.4,931000,-35.892,-20.7,-92.1,358.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 932000
  custom_metrics: {}
  date: 2021-10-29_04-04-55
  done: false
  episode_len_mean: 365.45
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -36.54500000000015
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 1
  episodes_total: 3152
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14721377128398672
          cur_lr: 5.000000000000001e-05
          entropy: 0.424141514632437
          entropy_coeff: 0.009999999999999998
          kl: 0.0024819378254280957
          policy_loss: -0.15155856274068355
          total_loss: 0.5967645977934202
          vf_explained_var: 0.36843714118003845
          vf_loss: 0.7521991843564643
    num_agent_steps_sampled: 932000
    num_agent_steps_trained: 932000
    num_steps_sampled: 932000
    num_steps_trained: 932000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,932,25115,932000,-36.545,-20.7,-92.1,365.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 933000
  custom_metrics: {}
  date: 2021-10-29_04-05-10
  done: false
  episode_len_mean: 372.15
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -37.21500000000015
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3154
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 0.7317772659990522
          entropy_coeff: 0.009999999999999998
          kl: 0.011001449857514858
          policy_loss: -0.0944124730096923
          total_loss: 0.7709845342569881
          vf_explained_var: 0.14619378745555878
          vf_loss: 0.8719049990177155
    num_agent_steps_sampled: 933000
    num_agent_steps_trained: 933000
    num_steps_sampled: 933000
    num_steps_trained: 933000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,933,25130.4,933000,-37.215,-20.7,-92.1,372.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 934000
  custom_metrics: {}
  date: 2021-10-29_04-05-31
  done: false
  episode_len_mean: 377.14
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -37.71400000000014
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3157
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 0.5981405617462264
          entropy_coeff: 0.009999999999999998
          kl: 0.008269621339344566
          policy_loss: 0.0879654230342971
          total_loss: 0.7462391780482398
          vf_explained_var: 0.3038182258605957
          vf_loss: 0.6636464580686556
    num_agent_steps_sampled: 934000
    num_agent_steps_trained: 934000
    num_steps_sampled: 934000
    num_steps_trained: 934000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,934,25151.2,934000,-37.714,-20.7,-92.1,377.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 935000
  custom_metrics: {}
  date: 2021-10-29_04-05-50
  done: false
  episode_len_mean: 377.61
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -37.76100000000013
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3159
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 0.8339071654611163
          entropy_coeff: 0.009999999999999998
          kl: 0.019708767176997218
          policy_loss: -0.047606489227877725
          total_loss: 0.5257220187120968
          vf_explained_var: 0.6029128432273865
          vf_loss: 0.5802168763346143
    num_agent_steps_sampled: 935000
    num_agent_steps_trained: 935000
    num_steps_sampled: 935000
    num_steps_trained: 935000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,935,25169.5,935000,-37.761,-20.7,-92.1,377.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 936000
  custom_metrics: {}
  date: 2021-10-29_04-06-11
  done: false
  episode_len_mean: 383.89
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -38.38900000000013
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3162
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 0.8319114926788542
          entropy_coeff: 0.009999999999999998
          kl: 0.009446010141574056
          policy_loss: 0.010146569377846188
          total_loss: 1.033676039510303
          vf_explained_var: 0.02168453484773636
          vf_loss: 1.0311532905532255
    num_agent_steps_sampled: 936000
    num_agent_steps_trained: 936000
    num_steps_sampled: 936000
    num_steps_trained: 936000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,936,25190.8,936000,-38.389,-20.9,-92.1,383.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 937000
  custom_metrics: {}
  date: 2021-10-29_04-06-37
  done: false
  episode_len_mean: 373.82
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -37.38200000000015
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 4
  episodes_total: 3166
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 1.093185559908549
          entropy_coeff: 0.009999999999999998
          kl: 0.013615128727493125
          policy_loss: 0.026287111557192273
          total_loss: 0.9609354144997067
          vf_explained_var: 0.35922369360923767
          vf_loss: 0.9445779932869806
    num_agent_steps_sampled: 937000
    num_agent_steps_trained: 937000
    num_steps_sampled: 937000
    num_steps_trained: 937000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,937,25216.6,937000,-37.382,-20.9,-92.1,373.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 938000
  custom_metrics: {}
  date: 2021-10-29_04-07-04
  done: false
  episode_len_mean: 373.63
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -37.36300000000015
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3169
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 0.9561009075906541
          entropy_coeff: 0.009999999999999998
          kl: 0.009783474487851388
          policy_loss: -0.023045292786426013
          total_loss: 0.47665140098995634
          vf_explained_var: 0.7622259259223938
          vf_loss: 0.508537573284573
    num_agent_steps_sampled: 938000
    num_agent_steps_trained: 938000
    num_steps_sampled: 938000
    num_steps_trained: 938000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,938,25243.4,938000,-37.363,-20.9,-92.1,373.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 939000
  custom_metrics: {}
  date: 2021-10-29_04-07-26
  done: false
  episode_len_mean: 376.79
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -37.67900000000015
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3172
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 1.0417016916804844
          entropy_coeff: 0.009999999999999998
          kl: 0.010460885295117192
          policy_loss: 0.06969749956495232
          total_loss: 0.8427513185474608
          vf_explained_var: 0.3116053342819214
          vf_loss: 0.7827008369896147
    num_agent_steps_sampled: 939000
    num_agent_steps_trained: 939000
    num_steps_sampled: 939000
    num_steps_trained: 939000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,939,25265.3,939000,-37.679,-20.9,-92.1,376.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 940000
  custom_metrics: {}
  date: 2021-10-29_04-07-39
  done: false
  episode_len_mean: 376.89
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -37.68900000000014
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3174
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07360688564199336
          cur_lr: 5.000000000000001e-05
          entropy: 0.5035410457187228
          entropy_coeff: 0.009999999999999998
          kl: 0.003598495889306482
          policy_loss: 0.16730058209763632
          total_loss: 0.40900590154859756
          vf_explained_var: 0.6451617479324341
          vf_loss: 0.2464758563735005
    num_agent_steps_sampled: 940000
    num_agent_steps_trained: 940000
    num_steps_sampled: 940000
    num_steps_trained: 940000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,940,25278.6,940000,-37.689,-20.9,-92.1,376.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 941000
  custom_metrics: {}
  date: 2021-10-29_04-07-53
  done: false
  episode_len_mean: 373.86
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -37.38600000000013
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 1
  episodes_total: 3175
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03680344282099668
          cur_lr: 5.000000000000001e-05
          entropy: 0.5942174798912472
          entropy_coeff: 0.009999999999999998
          kl: 0.012863328202902835
          policy_loss: -0.02323947474360466
          total_loss: 0.3632642436772585
          vf_explained_var: 0.504456639289856
          vf_loss: 0.39197248513810334
    num_agent_steps_sampled: 941000
    num_agent_steps_trained: 941000
    num_steps_sampled: 941000
    num_steps_trained: 941000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,941,25293.2,941000,-37.386,-20.9,-92.1,373.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 942000
  custom_metrics: {}
  date: 2021-10-29_04-08-08
  done: false
  episode_len_mean: 379.03
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -37.90300000000012
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3177
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03680344282099668
          cur_lr: 5.000000000000001e-05
          entropy: 0.7833696752786636
          entropy_coeff: 0.009999999999999998
          kl: 0.018355405068638298
          policy_loss: -0.07911901523669561
          total_loss: 1.2673399946755832
          vf_explained_var: 0.1914006918668747
          vf_loss: 1.3536171778208679
    num_agent_steps_sampled: 942000
    num_agent_steps_trained: 942000
    num_steps_sampled: 942000
    num_steps_trained: 942000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,942,25307.5,942000,-37.903,-20.9,-92.1,379.03




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 943000
  custom_metrics: {}
  date: 2021-10-29_04-08-43
  done: false
  episode_len_mean: 380.01
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -38.00100000000012
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3180
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03680344282099668
          cur_lr: 5.000000000000001e-05
          entropy: 0.8477285888459948
          entropy_coeff: 0.009999999999999998
          kl: 0.009145037237417834
          policy_loss: -0.05121579319238663
          total_loss: 1.2244956539736853
          vf_explained_var: 0.08226343989372253
          vf_loss: 1.2838521752092573
    num_agent_steps_sampled: 943000
    num_agent_steps_trained: 943000
    num_steps_sampled: 943000
    num_steps_trained: 943000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,943,25342.8,943000,-38.001,-20.9,-92.1,380.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 944000
  custom_metrics: {}
  date: 2021-10-29_04-08-56
  done: false
  episode_len_mean: 383.12
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -38.31200000000013
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 1
  episodes_total: 3181
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03680344282099668
          cur_lr: 5.000000000000001e-05
          entropy: 0.868075637684928
          entropy_coeff: 0.009999999999999998
          kl: 0.016542759597998707
          policy_loss: -0.07305085741811329
          total_loss: 0.5771739113661978
          vf_explained_var: 0.29478612542152405
          vf_loss: 0.6582967047579587
    num_agent_steps_sampled: 944000
    num_agent_steps_trained: 944000
    num_steps_sampled: 944000
    num_steps_trained: 944000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,944,25355.6,944000,-38.312,-20.9,-92.1,383.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 945000
  custom_metrics: {}
  date: 2021-10-29_04-09-12
  done: false
  episode_len_mean: 388.39
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -38.83900000000013
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3183
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03680344282099668
          cur_lr: 5.000000000000001e-05
          entropy: 0.7426327834526698
          entropy_coeff: 0.009999999999999998
          kl: 0.015722009910862353
          policy_loss: -0.08877992067072127
          total_loss: 1.134161338210106
          vf_explained_var: 0.29460012912750244
          vf_loss: 1.2297889699124627
    num_agent_steps_sampled: 945000
    num_agent_steps_trained: 945000
    num_steps_sampled: 945000
    num_steps_trained: 945000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,945,25371.9,945000,-38.839,-20.9,-92.1,388.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 946000
  custom_metrics: {}
  date: 2021-10-29_04-09-35
  done: false
  episode_len_mean: 387.39
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -38.73900000000012
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3186
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03680344282099668
          cur_lr: 5.000000000000001e-05
          entropy: 0.9893953217400445
          entropy_coeff: 0.009999999999999998
          kl: 0.011004452475909608
          policy_loss: -0.021017780900001524
          total_loss: 0.9150691085391575
          vf_explained_var: -0.061587486416101456
          vf_loss: 0.945575850456953
    num_agent_steps_sampled: 946000
    num_agent_steps_trained: 946000
    num_steps_sampled: 946000
    num_steps_trained: 946000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,946,25395.1,946000,-38.739,-20.9,-92.1,387.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 947000
  custom_metrics: {}
  date: 2021-10-29_04-09-49
  done: false
  episode_len_mean: 393.01
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -39.301000000000116
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3188
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03680344282099668
          cur_lr: 5.000000000000001e-05
          entropy: 0.7039092974530325
          entropy_coeff: 0.009999999999999998
          kl: 0.03402315884094978
          policy_loss: 0.13569550265868505
          total_loss: 0.30710359497202766
          vf_explained_var: 0.5691022872924805
          vf_loss: 0.1771950173088246
    num_agent_steps_sampled: 947000
    num_agent_steps_trained: 947000
    num_steps_sampled: 947000
    num_steps_trained: 947000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,947,25409,947000,-39.301,-20.9,-92.1,393.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 948000
  custom_metrics: {}
  date: 2021-10-29_04-10-08
  done: false
  episode_len_mean: 393.53
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -39.35300000000012
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3190
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05520516423149502
          cur_lr: 5.000000000000001e-05
          entropy: 0.885036395655738
          entropy_coeff: 0.009999999999999998
          kl: 0.014798411376760572
          policy_loss: -0.05965830286343892
          total_loss: 0.8037969224982792
          vf_explained_var: 0.5474300980567932
          vf_loss: 0.8714886432099673
    num_agent_steps_sampled: 948000
    num_agent_steps_trained: 948000
    num_steps_sampled: 948000
    num_steps_trained: 948000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,948,25428,948000,-39.353,-21.2,-92.1,393.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 949000
  custom_metrics: {}
  date: 2021-10-29_04-10-25
  done: false
  episode_len_mean: 398.31
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -39.83100000000012
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3192
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05520516423149502
          cur_lr: 5.000000000000001e-05
          entropy: 0.9957004911369748
          entropy_coeff: 0.009999999999999998
          kl: 0.009002648513119422
          policy_loss: -0.0958998108903567
          total_loss: 1.161441732611921
          vf_explained_var: 0.2713967561721802
          vf_loss: 1.2668015419195096
    num_agent_steps_sampled: 949000
    num_agent_steps_trained: 949000
    num_steps_sampled: 949000
    num_steps_trained: 949000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,949,25444.6,949000,-39.831,-21.2,-92.1,398.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 950000
  custom_metrics: {}
  date: 2021-10-29_04-10-42
  done: false
  episode_len_mean: 396.57
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -39.65700000000011
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3195
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05520516423149502
          cur_lr: 5.000000000000001e-05
          entropy: 0.8234187324841817
          entropy_coeff: 0.009999999999999998
          kl: 0.02501982153509772
          policy_loss: 0.035821247183614305
          total_loss: 0.9246124890115526
          vf_explained_var: 0.2750706970691681
          vf_loss: 0.8956442111068301
    num_agent_steps_sampled: 950000
    num_agent_steps_trained: 950000
    num_steps_sampled: 950000
    num_steps_trained: 950000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,950,25461.7,950000,-39.657,-21.2,-92.1,396.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 951000
  custom_metrics: {}
  date: 2021-10-29_04-11-00
  done: false
  episode_len_mean: 400.74
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -40.07400000000011
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3197
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.9617986235353682
          entropy_coeff: 0.009999999999999998
          kl: 0.007307535857085472
          policy_loss: 0.06743523445394305
          total_loss: 0.7729883707231946
          vf_explained_var: -0.25013768672943115
          vf_loss: 0.7145659898718199
    num_agent_steps_sampled: 951000
    num_agent_steps_trained: 951000
    num_steps_sampled: 951000
    num_steps_trained: 951000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,951,25479.5,951000,-40.074,-21.2,-92.1,400.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 952000
  custom_metrics: {}
  date: 2021-10-29_04-11-29
  done: false
  episode_len_mean: 401.21
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -40.121000000000116
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 4
  episodes_total: 3201
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.8573969152238634
          entropy_coeff: 0.009999999999999998
          kl: 0.008140898375789461
          policy_loss: 0.01796762910154131
          total_loss: 0.9350724505053626
          vf_explained_var: 0.5615185499191284
          vf_loss: 0.9250046663814121
    num_agent_steps_sampled: 952000
    num_agent_steps_trained: 952000
    num_steps_sampled: 952000
    num_steps_trained: 952000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,952,25508.1,952000,-40.121,-21.2,-92.1,401.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 953000
  custom_metrics: {}
  date: 2021-10-29_04-11-46
  done: false
  episode_len_mean: 400.74
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -40.0740000000001
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3203
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08280774634724253
          cur_lr: 5.000000000000001e-05
          entropy: 0.7938494059774611
          entropy_coeff: 0.009999999999999998
          kl: 0.02744552820328118
          policy_loss: 0.03350353638331095
          total_loss: 0.5783347970909543
          vf_explained_var: -0.022891413420438766
          vf_loss: 0.5504970570198364
    num_agent_steps_sampled: 953000
    num_agent_steps_trained: 953000
    num_steps_sampled: 953000
    num_steps_trained: 953000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,953,25525.1,953000,-40.074,-21.2,-92.1,400.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 954000
  custom_metrics: {}
  date: 2021-10-29_04-12-02
  done: false
  episode_len_mean: 403.49
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -40.349000000000096
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3205
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12421161952086379
          cur_lr: 5.000000000000001e-05
          entropy: 0.9658668862448798
          entropy_coeff: 0.009999999999999998
          kl: 0.034059474477388184
          policy_loss: 0.11302231268750297
          total_loss: 0.5128297319014867
          vf_explained_var: 0.3878767788410187
          vf_loss: 0.40523550187548
    num_agent_steps_sampled: 954000
    num_agent_steps_trained: 954000
    num_steps_sampled: 954000
    num_steps_trained: 954000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,954,25541.1,954000,-40.349,-21.2,-92.1,403.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 955000
  custom_metrics: {}
  date: 2021-10-29_04-12-18
  done: false
  episode_len_mean: 408.62
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -40.8620000000001
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3207
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1863174292812957
          cur_lr: 5.000000000000001e-05
          entropy: 1.1288291262255774
          entropy_coeff: 0.009999999999999998
          kl: 0.01311996752049317
          policy_loss: 0.1466490089065499
          total_loss: 0.5099530498186747
          vf_explained_var: 0.5311831831932068
          vf_loss: 0.3721478554006252
    num_agent_steps_sampled: 955000
    num_agent_steps_trained: 955000
    num_steps_sampled: 955000
    num_steps_trained: 955000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,955,25557.7,955000,-40.862,-21.2,-92.1,408.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 956000
  custom_metrics: {}
  date: 2021-10-29_04-12-38
  done: false
  episode_len_mean: 413.08
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.3080000000001
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3209
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1863174292812957
          cur_lr: 5.000000000000001e-05
          entropy: 1.266089177131653
          entropy_coeff: 0.009999999999999998
          kl: 0.005249247675996256
          policy_loss: 0.02116778799229198
          total_loss: 0.3288717206981447
          vf_explained_var: 0.6622551679611206
          vf_loss: 0.31938679918853774
    num_agent_steps_sampled: 956000
    num_agent_steps_trained: 956000
    num_steps_sampled: 956000
    num_steps_trained: 956000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,956,25577.2,956000,-41.308,-21.2,-92.1,413.08




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 957000
  custom_metrics: {}
  date: 2021-10-29_04-13-12
  done: false
  episode_len_mean: 413.67
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.367000000000104
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3211
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1863174292812957
          cur_lr: 5.000000000000001e-05
          entropy: 0.9747871855894724
          entropy_coeff: 0.009999999999999998
          kl: 0.009326180089687755
          policy_loss: -0.06106165183915032
          total_loss: 0.49102277499106195
          vf_explained_var: 0.5798534154891968
          vf_loss: 0.5600946598065396
    num_agent_steps_sampled: 957000
    num_agent_steps_trained: 957000
    num_steps_sampled: 957000
    num_steps_trained: 957000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,957,25611.5,957000,-41.367,-21.2,-92.1,413.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 958000
  custom_metrics: {}
  date: 2021-10-29_04-13-32
  done: false
  episode_len_mean: 419.18
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.9180000000001
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3214
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1863174292812957
          cur_lr: 5.000000000000001e-05
          entropy: 1.1163048326969147
          entropy_coeff: 0.009999999999999998
          kl: 0.02105019767695282
          policy_loss: -0.08630898007088238
          total_loss: 1.2023106667730543
          vf_explained_var: 0.535802960395813
          vf_loss: 1.2958606971634758
    num_agent_steps_sampled: 958000
    num_agent_steps_trained: 958000
    num_steps_sampled: 958000
    num_steps_trained: 958000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,958,25631.8,958000,-41.918,-21.2,-92.1,419.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 959000
  custom_metrics: {}
  date: 2021-10-29_04-13-53
  done: false
  episode_len_mean: 423.2
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -42.32000000000011
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3217
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27947614392194353
          cur_lr: 5.000000000000001e-05
          entropy: 0.9337734772099389
          entropy_coeff: 0.009999999999999998
          kl: 0.00848000932506273
          policy_loss: -0.05049192897147602
          total_loss: 0.8493275549676683
          vf_explained_var: 0.5249148011207581
          vf_loss: 0.9067872570620643
    num_agent_steps_sampled: 959000
    num_agent_steps_trained: 959000
    num_steps_sampled: 959000
    num_steps_trained: 959000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,959,25652.4,959000,-42.32,-21.2,-92.1,423.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 960000
  custom_metrics: {}
  date: 2021-10-29_04-14-10
  done: false
  episode_len_mean: 428.22
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -42.82200000000011
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 2
  episodes_total: 3219
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27947614392194353
          cur_lr: 5.000000000000001e-05
          entropy: 1.1110915892653994
          entropy_coeff: 0.009999999999999998
          kl: 0.006585869964644786
          policy_loss: -0.048304497285021675
          total_loss: 0.6823000328408347
          vf_explained_var: 0.0008871741010807455
          vf_loss: 0.7398748509378896
    num_agent_steps_sampled: 960000
    num_agent_steps_trained: 960000
    num_steps_sampled: 960000
    num_steps_trained: 960000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,960,25669.1,960000,-42.822,-21.2,-92.1,428.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 961000
  custom_metrics: {}
  date: 2021-10-29_04-14-23
  done: false
  episode_len_mean: 430.6
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -43.0600000000001
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 1
  episodes_total: 3220
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27947614392194353
          cur_lr: 5.000000000000001e-05
          entropy: 1.2374997682041593
          entropy_coeff: 0.009999999999999998
          kl: 0.020611534853775027
          policy_loss: -0.07089175615045759
          total_loss: 0.6933886236614651
          vf_explained_var: -0.6084161996841431
          vf_loss: 0.770894948600067
    num_agent_steps_sampled: 961000
    num_agent_steps_trained: 961000
    num_steps_sampled: 961000
    num_steps_trained: 961000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,961,25681.9,961000,-43.06,-21.2,-92.1,430.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 962000
  custom_metrics: {}
  date: 2021-10-29_04-14-40
  done: false
  episode_len_mean: 428.63
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -42.86300000000011
  episode_reward_min: -92.09999999999904
  episodes_this_iter: 3
  episodes_total: 3223
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 1.1148349344730377
          entropy_coeff: 0.009999999999999998
          kl: 0.0097098173238897
          policy_loss: 0.07922883646355736
          total_loss: 1.170111103521453
          vf_explained_var: -0.08351621776819229
          vf_loss: 1.0979601220124298
    num_agent_steps_sampled: 962000
    num_agent_steps_trained: 962000
    num_steps_sampled: 962000
    num_steps_trained: 962000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,962,25699.1,962000,-42.863,-21.2,-92.1,428.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 963000
  custom_metrics: {}
  date: 2021-10-29_04-15-09
  done: false
  episode_len_mean: 418.81
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.88100000000012
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 4
  episodes_total: 3227
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 0.8258500595887502
          entropy_coeff: 0.009999999999999998
          kl: 0.011094972371533235
          policy_loss: -0.009863804446326362
          total_loss: 0.7459237505992253
          vf_explained_var: 0.6625474691390991
          vf_loss: 0.7593948827849494
    num_agent_steps_sampled: 963000
    num_agent_steps_trained: 963000
    num_steps_sampled: 963000
    num_steps_trained: 963000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,963,25727.9,963000,-41.881,-21.2,-88.7,418.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 964000
  custom_metrics: {}
  date: 2021-10-29_04-15-28
  done: false
  episode_len_mean: 414.88
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.48800000000013
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 2
  episodes_total: 3229
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 1.193680034081141
          entropy_coeff: 0.009999999999999998
          kl: 0.012253320632911308
          policy_loss: -0.053093757728735605
          total_loss: 0.7685391437676218
          vf_explained_var: 0.11509204655885696
          vf_loss: 0.8284329551375574
    num_agent_steps_sampled: 964000
    num_agent_steps_trained: 964000
    num_steps_sampled: 964000
    num_steps_trained: 964000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,964,25747,964000,-41.488,-21.2,-88.7,414.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 965000
  custom_metrics: {}
  date: 2021-10-29_04-15-52
  done: false
  episode_len_mean: 410.44
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.044000000000125
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 3
  episodes_total: 3232
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 1.0953342921204037
          entropy_coeff: 0.009999999999999998
          kl: 0.007159243746632053
          policy_loss: 0.0016150749391979642
          total_loss: 1.0745026611619526
          vf_explained_var: 0.3013681471347809
          vf_loss: 1.0808396741747857
    num_agent_steps_sampled: 965000
    num_agent_steps_trained: 965000
    num_steps_sampled: 965000
    num_steps_trained: 965000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,965,25771.2,965000,-41.044,-21.2,-88.7,410.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 966000
  custom_metrics: {}
  date: 2021-10-29_04-16-06
  done: false
  episode_len_mean: 414.8
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.480000000000125
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 2
  episodes_total: 3234
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 1.2053512321578133
          entropy_coeff: 0.009999999999999998
          kl: 0.00722620138359302
          policy_loss: -0.0655897646314568
          total_loss: 0.7629487050904168
          vf_explained_var: 0.165378600358963
          vf_loss: 0.8375626697722408
    num_agent_steps_sampled: 966000
    num_agent_steps_trained: 966000
    num_steps_sampled: 966000
    num_steps_trained: 966000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,966,25785.1,966000,-41.48,-21.2,-88.7,414.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 967000
  custom_metrics: {}
  date: 2021-10-29_04-16-20
  done: false
  episode_len_mean: 418.95
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.89500000000014
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 2
  episodes_total: 3236
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 1.3372215602133009
          entropy_coeff: 0.009999999999999998
          kl: 0.008824071893786152
          policy_loss: 0.10530220286713707
          total_loss: 0.8655601365698709
          vf_explained_var: -0.5722037553787231
          vf_loss: 0.7699309784919024
    num_agent_steps_sampled: 967000
    num_agent_steps_trained: 967000
    num_steps_sampled: 967000
    num_steps_trained: 967000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,967,25799.2,967000,-41.895,-21.2,-88.7,418.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 968000
  custom_metrics: {}
  date: 2021-10-29_04-16-42
  done: false
  episode_len_mean: 419.25
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.92500000000014
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 2
  episodes_total: 3238
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 1.3017071392801074
          entropy_coeff: 0.009999999999999998
          kl: 0.0077628638142775075
          policy_loss: 0.037949543446302414
          total_loss: 0.3082964157892598
          vf_explained_var: 0.1340710073709488
          vf_loss: 0.28010963729272287
    num_agent_steps_sampled: 968000
    num_agent_steps_trained: 968000
    num_steps_sampled: 968000
    num_steps_trained: 968000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,968,25821,968000,-41.925,-21.2,-88.7,419.25




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 969000
  custom_metrics: {}
  date: 2021-10-29_04-17-16
  done: false
  episode_len_mean: 423.83
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -42.38300000000014
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 3
  episodes_total: 3241
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 1.0385666383637322
          entropy_coeff: 0.009999999999999998
          kl: 0.008416777711735317
          policy_loss: 0.07045942222078641
          total_loss: 0.8380217283964158
          vf_explained_var: 0.2821676731109619
          vf_loss: 0.774419539468363
    num_agent_steps_sampled: 969000
    num_agent_steps_trained: 969000
    num_steps_sampled: 969000
    num_steps_trained: 969000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,969,25855.3,969000,-42.383,-21.2,-88.7,423.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 970000
  custom_metrics: {}
  date: 2021-10-29_04-17-32
  done: false
  episode_len_mean: 426.83
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -42.683000000000135
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 1
  episodes_total: 3242
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 1.3812655991978116
          entropy_coeff: 0.009999999999999998
          kl: 0.0065075789133865036
          policy_loss: -0.05719256732198927
          total_loss: 0.7642467204067442
          vf_explained_var: -0.6506554484367371
          vf_loss: 0.8325238725791375
    num_agent_steps_sampled: 970000
    num_agent_steps_trained: 970000
    num_steps_sampled: 970000
    num_steps_trained: 970000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,970,25870.6,970000,-42.683,-21.2,-88.7,426.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 971000
  custom_metrics: {}
  date: 2021-10-29_04-17-57
  done: false
  episode_len_mean: 419.15
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.915000000000134
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 4
  episodes_total: 3246
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41921421588291535
          cur_lr: 5.000000000000001e-05
          entropy: 0.8938360280460782
          entropy_coeff: 0.009999999999999998
          kl: 0.020567241731268383
          policy_loss: -0.07107885347472297
          total_loss: 1.0979662285910712
          vf_explained_var: 0.403969407081604
          vf_loss: 1.1693613519271215
    num_agent_steps_sampled: 971000
    num_agent_steps_trained: 971000
    num_steps_sampled: 971000
    num_steps_trained: 971000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,971,25896.3,971000,-41.915,-21.2,-88.7,419.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 972000
  custom_metrics: {}
  date: 2021-10-29_04-18-21
  done: false
  episode_len_mean: 416.22
  episode_media: {}
  episode_reward_max: -21.20000000000003
  episode_reward_mean: -41.622000000000135
  episode_reward_min: -88.69999999999924
  episodes_this_iter: 3
  episodes_total: 3249
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6288213238243731
          cur_lr: 5.000000000000001e-05
          entropy: 0.9565351638529036
          entropy_coeff: 0.009999999999999998
          kl: 0.01069965988507617
          policy_loss: -0.10247412108712727
          total_loss: 0.9233309626579285
          vf_explained_var: 0.4378059506416321
          vf_loss: 1.0286422775851356
    num_agent_steps_sampled: 972000
    num_agent_steps_trained: 972000
    num_steps_sampled: 972000
    num_steps_trained: 972000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,972,25919.7,972000,-41.622,-21.2,-88.7,416.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 973000
  custom_metrics: {}
  date: 2021-10-29_04-18-47
  done: false
  episode_len_mean: 406.57
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.657000000000146
  episode_reward_min: -84.19999999999949
  episodes_this_iter: 4
  episodes_total: 3253
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6288213238243731
          cur_lr: 5.000000000000001e-05
          entropy: 0.9222459355990092
          entropy_coeff: 0.009999999999999998
          kl: 0.012263153013283044
          policy_loss: -0.014445845120482975
          total_loss: 1.0926648451222314
          vf_explained_var: 0.25350579619407654
          vf_loss: 1.10862182047632
    num_agent_steps_sampled: 973000
    num_agent_steps_trained: 973000
    num_steps_sampled: 973000
    num_steps_trained: 973000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,973,25945.8,973000,-40.657,-21.5,-84.2,406.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 974000
  custom_metrics: {}
  date: 2021-10-29_04-19-09
  done: false
  episode_len_mean: 402.66
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.26600000000017
  episode_reward_min: -84.19999999999949
  episodes_this_iter: 3
  episodes_total: 3256
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6288213238243731
          cur_lr: 5.000000000000001e-05
          entropy: 0.996471498409907
          entropy_coeff: 0.009999999999999998
          kl: 0.0023469546042918744
          policy_loss: 0.1050780865881178
          total_loss: 0.6283617910411623
          vf_explained_var: 0.5252404808998108
          vf_loss: 0.5317725968236725
    num_agent_steps_sampled: 974000
    num_agent_steps_trained: 974000
    num_steps_sampled: 974000
    num_steps_trained: 974000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,974,25967.8,974000,-40.266,-21.5,-84.2,402.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 975000
  custom_metrics: {}
  date: 2021-10-29_04-19-28
  done: false
  episode_len_mean: 405.05
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.50500000000017
  episode_reward_min: -84.19999999999949
  episodes_this_iter: 2
  episodes_total: 3258
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31441066191218653
          cur_lr: 5.000000000000001e-05
          entropy: 1.1554590218596987
          entropy_coeff: 0.009999999999999998
          kl: 0.01042467018114933
          policy_loss: -0.060979672604136996
          total_loss: 0.4854170557525423
          vf_explained_var: 0.1268317848443985
          vf_loss: 0.5546736874514155
    num_agent_steps_sampled: 975000
    num_agent_steps_trained: 975000
    num_steps_sampled: 975000
    num_steps_trained: 975000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,975,25986.9,975000,-40.505,-21.5,-84.2,405.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 976000
  custom_metrics: {}
  date: 2021-10-29_04-19-49
  done: false
  episode_len_mean: 401.63
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.163000000000174
  episode_reward_min: -84.19999999999949
  episodes_this_iter: 3
  episodes_total: 3261
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31441066191218653
          cur_lr: 5.000000000000001e-05
          entropy: 0.9881617850727505
          entropy_coeff: 0.009999999999999998
          kl: 0.012713849770363102
          policy_loss: -0.09188490352696843
          total_loss: 1.3919727524121603
          vf_explained_var: 0.0687367171049118
          vf_loss: 1.489741908179389
    num_agent_steps_sampled: 976000
    num_agent_steps_trained: 976000
    num_steps_sampled: 976000
    num_steps_trained: 976000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,976,26008.2,976000,-40.163,-21.5,-84.2,401.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 977000
  custom_metrics: {}
  date: 2021-10-29_04-20-16
  done: false
  episode_len_mean: 403.0
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.30000000000017
  episode_reward_min: -84.19999999999949
  episodes_this_iter: 4
  episodes_total: 3265
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31441066191218653
          cur_lr: 5.000000000000001e-05
          entropy: 0.8992932617664338
          entropy_coeff: 0.009999999999999998
          kl: 0.011864367289453684
          policy_loss: -0.06916273360451063
          total_loss: 0.7888732933335834
          vf_explained_var: 0.5324701070785522
          vf_loss: 0.8632986777358584
    num_agent_steps_sampled: 977000
    num_agent_steps_trained: 977000
    num_steps_sampled: 977000
    num_steps_trained: 977000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,977,26034.9,977000,-40.3,-21.5,-84.2,403


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 978000
  custom_metrics: {}
  date: 2021-10-29_04-20-36
  done: false
  episode_len_mean: 405.54
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.55400000000018
  episode_reward_min: -84.19999999999949
  episodes_this_iter: 2
  episodes_total: 3267
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31441066191218653
          cur_lr: 5.000000000000001e-05
          entropy: 1.0718208856052822
          entropy_coeff: 0.009999999999999998
          kl: 0.011159112161952854
          policy_loss: 0.0413548395037651
          total_loss: 0.5805634442302916
          vf_explained_var: 0.20186147093772888
          vf_loss: 0.5464182780848609
    num_agent_steps_sampled: 978000
    num_agent_steps_trained: 978000
    num_steps_sampled: 978000
    num_steps_trained: 978000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,978,26054.6,978000,-40.554,-21.5,-84.2,405.54




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 979000
  custom_metrics: {}
  date: 2021-10-29_04-21-13
  done: false
  episode_len_mean: 407.43
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.74300000000018
  episode_reward_min: -84.19999999999949
  episodes_this_iter: 3
  episodes_total: 3270
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31441066191218653
          cur_lr: 5.000000000000001e-05
          entropy: 1.0435410923428006
          entropy_coeff: 0.009999999999999998
          kl: 0.009321615564294521
          policy_loss: 0.008529864831103219
          total_loss: 0.9460734410418404
          vf_explained_var: 0.36584293842315674
          vf_loss: 0.9450481677427888
    num_agent_steps_sampled: 979000
    num_agent_steps_trained: 979000
    num_steps_sampled: 979000
    num_steps_trained: 979000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,979,26091.8,979000,-40.743,-21.5,-84.2,407.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 980000
  custom_metrics: {}
  date: 2021-10-29_04-21-31
  done: false
  episode_len_mean: 407.9
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.79000000000018
  episode_reward_min: -84.19999999999949
  episodes_this_iter: 2
  episodes_total: 3272
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31441066191218653
          cur_lr: 5.000000000000001e-05
          entropy: 0.9574668334590064
          entropy_coeff: 0.009999999999999998
          kl: 0.005408614896540382
          policy_loss: -0.045640923745102355
          total_loss: 1.1441394673453438
          vf_explained_var: -0.08315876871347427
          vf_loss: 1.1976545412507322
    num_agent_steps_sampled: 980000
    num_agent_steps_trained: 980000
    num_steps_sampled: 980000
    num_steps_trained: 980000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,980,26110.3,980000,-40.79,-21.5,-84.2,407.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 981000
  custom_metrics: {}
  date: 2021-10-29_04-21-48
  done: false
  episode_len_mean: 408.54
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.854000000000184
  episode_reward_min: -84.19999999999949
  episodes_this_iter: 2
  episodes_total: 3274
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31441066191218653
          cur_lr: 5.000000000000001e-05
          entropy: 0.943894933991962
          entropy_coeff: 0.009999999999999998
          kl: 0.0048609978630134755
          policy_loss: 0.0043303146958351135
          total_loss: 0.824102618959215
          vf_explained_var: -0.3367585241794586
          vf_loss: 0.8276829140053855
    num_agent_steps_sampled: 981000
    num_agent_steps_trained: 981000
    num_steps_sampled: 981000
    num_steps_trained: 981000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,981,26126.7,981000,-40.854,-21.5,-84.2,408.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 982000
  custom_metrics: {}
  date: 2021-10-29_04-22-09
  done: false
  episode_len_mean: 404.34
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.4340000000002
  episode_reward_min: -82.2999999999996
  episodes_this_iter: 3
  episodes_total: 3277
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15720533095609326
          cur_lr: 5.000000000000001e-05
          entropy: 0.9333403050899506
          entropy_coeff: 0.009999999999999998
          kl: 0.005285946558106818
          policy_loss: 0.038200742254654564
          total_loss: 1.3315240098370447
          vf_explained_var: -0.03072217106819153
          vf_loss: 1.3018256713118819
    num_agent_steps_sampled: 982000
    num_agent_steps_trained: 982000
    num_steps_sampled: 982000
    num_steps_trained: 982000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,982,26147.9,982000,-40.434,-21.5,-82.3,404.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 983000
  custom_metrics: {}
  date: 2021-10-29_04-22-26
  done: false
  episode_len_mean: 399.11
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -39.9110000000002
  episode_reward_min: -82.2999999999996
  episodes_this_iter: 2
  episodes_total: 3279
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15720533095609326
          cur_lr: 5.000000000000001e-05
          entropy: 0.9457154280609554
          entropy_coeff: 0.009999999999999998
          kl: 0.010172078444552212
          policy_loss: -0.07411923540963067
          total_loss: 1.1216366304291618
          vf_explained_var: -0.2085471749305725
          vf_loss: 1.203613889714082
    num_agent_steps_sampled: 983000
    num_agent_steps_trained: 983000
    num_steps_sampled: 983000
    num_steps_trained: 983000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,983,26164.5,983000,-39.911,-21.5,-82.3,399.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 984000
  custom_metrics: {}
  date: 2021-10-29_04-22-41
  done: false
  episode_len_mean: 402.36
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.236000000000196
  episode_reward_min: -82.2999999999996
  episodes_this_iter: 2
  episodes_total: 3281
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15720533095609326
          cur_lr: 5.000000000000001e-05
          entropy: 0.8578915754954021
          entropy_coeff: 0.009999999999999998
          kl: 0.008402591378607679
          policy_loss: -0.07105135685867733
          total_loss: 1.3165756324927012
          vf_explained_var: -0.29143601655960083
          vf_loss: 1.3948849639130962
    num_agent_steps_sampled: 984000
    num_agent_steps_trained: 984000
    num_steps_sampled: 984000
    num_steps_trained: 984000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,984,26179.9,984000,-40.236,-21.5,-82.3,402.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 985000
  custom_metrics: {}
  date: 2021-10-29_04-22-55
  done: false
  episode_len_mean: 402.62
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.262000000000214
  episode_reward_min: -78.89999999999979
  episodes_this_iter: 2
  episodes_total: 3283
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15720533095609326
          cur_lr: 5.000000000000001e-05
          entropy: 0.9195446928342184
          entropy_coeff: 0.009999999999999998
          kl: 0.0033757830080696116
          policy_loss: 0.09750114430983861
          total_loss: 0.8039153807693058
          vf_explained_var: -0.5087155103683472
          vf_loss: 0.7150789938349691
    num_agent_steps_sampled: 985000
    num_agent_steps_trained: 985000
    num_steps_sampled: 985000
    num_steps_trained: 985000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,985,26193.3,985000,-40.262,-21.5,-78.9,402.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 986000
  custom_metrics: {}
  date: 2021-10-29_04-23-12
  done: false
  episode_len_mean: 404.7
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.47000000000022
  episode_reward_min: -78.89999999999979
  episodes_this_iter: 2
  episodes_total: 3285
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07860266547804663
          cur_lr: 5.000000000000001e-05
          entropy: 0.9354349619812435
          entropy_coeff: 0.009999999999999998
          kl: 0.007991383527783964
          policy_loss: 0.1294487212267187
          total_loss: 0.4406519755721092
          vf_explained_var: -0.1725132316350937
          vf_loss: 0.3199294595254792
    num_agent_steps_sampled: 986000
    num_agent_steps_trained: 986000
    num_steps_sampled: 986000
    num_steps_trained: 986000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,986,26210.7,986000,-40.47,-21.5,-78.9,404.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 987000
  custom_metrics: {}
  date: 2021-10-29_04-23-32
  done: false
  episode_len_mean: 405.82
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.58200000000022
  episode_reward_min: -78.89999999999979
  episodes_this_iter: 2
  episodes_total: 3287
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07860266547804663
          cur_lr: 5.000000000000001e-05
          entropy: 0.9651550783051385
          entropy_coeff: 0.009999999999999998
          kl: 0.004162173699134206
          policy_loss: -0.07504313869608772
          total_loss: 1.232218414876196
          vf_explained_var: -0.27289626002311707
          vf_loss: 1.3165859415299364
    num_agent_steps_sampled: 987000
    num_agent_steps_trained: 987000
    num_steps_sampled: 987000
    num_steps_trained: 987000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,987,26230.3,987000,-40.582,-21.5,-78.9,405.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 988000
  custom_metrics: {}
  date: 2021-10-29_04-23-46
  done: false
  episode_len_mean: 405.23
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.52300000000022
  episode_reward_min: -76.29999999999994
  episodes_this_iter: 2
  episodes_total: 3289
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.039301332739023316
          cur_lr: 5.000000000000001e-05
          entropy: 0.9400112589200338
          entropy_coeff: 0.009999999999999998
          kl: 0.011838230341047673
          policy_loss: -0.07559651119841469
          total_loss: 1.3107712325122622
          vf_explained_var: 0.208815336227417
          vf_loss: 1.3953025920316577
    num_agent_steps_sampled: 988000
    num_agent_steps_trained: 988000
    num_steps_sampled: 988000
    num_steps_trained: 988000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,988,26245,988000,-40.523,-21.5,-76.3,405.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 989000
  custom_metrics: {}
  date: 2021-10-29_04-24-01
  done: false
  episode_len_mean: 406.84
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.68400000000023
  episode_reward_min: -76.29999999999994
  episodes_this_iter: 2
  episodes_total: 3291
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.039301332739023316
          cur_lr: 5.000000000000001e-05
          entropy: 0.8452915675110287
          entropy_coeff: 0.009999999999999998
          kl: 0.007621957989301996
          policy_loss: -0.030248661670419903
          total_loss: 1.1570118854443232
          vf_explained_var: -0.31612247228622437
          vf_loss: 1.195413887169626
    num_agent_steps_sampled: 989000
    num_agent_steps_trained: 989000
    num_steps_sampled: 989000
    num_steps_trained: 989000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,989,26259.6,989000,-40.684,-21.5,-76.3,406.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 990000
  custom_metrics: {}
  date: 2021-10-29_04-24-18
  done: false
  episode_len_mean: 406.72
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.67200000000023
  episode_reward_min: -76.29999999999994
  episodes_this_iter: 2
  episodes_total: 3293
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.039301332739023316
          cur_lr: 5.000000000000001e-05
          entropy: 0.7213788045777215
          entropy_coeff: 0.009999999999999998
          kl: 0.031419742528251186
          policy_loss: -0.05869339406490326
          total_loss: 1.1673658708731334
          vf_explained_var: -0.13350054621696472
          vf_loss: 1.2320382023644116
    num_agent_steps_sampled: 990000
    num_agent_steps_trained: 990000
    num_steps_sampled: 990000
    num_steps_trained: 990000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,990,26276.6,990000,-40.672,-21.5,-76.3,406.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 991000
  custom_metrics: {}
  date: 2021-10-29_04-24-37
  done: false
  episode_len_mean: 403.17
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.31700000000024
  episode_reward_min: -76.29999999999994
  episodes_this_iter: 3
  episodes_total: 3296
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.8798411210378011
          entropy_coeff: 0.009999999999999998
          kl: 0.014382502728451542
          policy_loss: 0.05020857668585248
          total_loss: 1.2556447363562053
          vf_explained_var: -0.03506920859217644
          vf_loss: 1.213386679854658
    num_agent_steps_sampled: 991000
    num_agent_steps_trained: 991000
    num_steps_sampled: 991000
    num_steps_trained: 991000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,991,26296.1,991000,-40.317,-21.5,-76.3,403.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 992000
  custom_metrics: {}
  date: 2021-10-29_04-25-04
  done: false
  episode_len_mean: 404.28
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.428000000000246
  episode_reward_min: -76.29999999999994
  episodes_this_iter: 3
  episodes_total: 3299
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.9716079029772017
          entropy_coeff: 0.009999999999999998
          kl: 0.014473459565978617
          policy_loss: 0.03338624089956284
          total_loss: 1.1599986576371724
          vf_explained_var: 0.05677933245897293
          vf_loss: 1.1354752669731776
    num_agent_steps_sampled: 992000
    num_agent_steps_trained: 992000
    num_steps_sampled: 992000
    num_steps_trained: 992000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,992,26322.3,992000,-40.428,-21.5,-76.3,404.28




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 993000
  custom_metrics: {}
  date: 2021-10-29_04-25-41
  done: false
  episode_len_mean: 402.6
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -40.26000000000025
  episode_reward_min: -76.29999999999994
  episodes_this_iter: 3
  episodes_total: 3302
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.8976624952422247
          entropy_coeff: 0.009999999999999998
          kl: 0.012311664705165508
          policy_loss: 0.04697498679161072
          total_loss: 1.1297191126479043
          vf_explained_var: 0.25255000591278076
          vf_loss: 1.0909949504770338
    num_agent_steps_sampled: 993000
    num_agent_steps_trained: 993000
    num_steps_sampled: 993000
    num_steps_trained: 993000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,993,26359.4,993000,-40.26,-21.5,-76.3,402.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 994000
  custom_metrics: {}
  date: 2021-10-29_04-26-05
  done: false
  episode_len_mean: 399.75
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -39.97500000000026
  episode_reward_min: -75.59999999999998
  episodes_this_iter: 3
  episodes_total: 3305
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 0.9859356429841784
          entropy_coeff: 0.009999999999999998
          kl: 0.018224116051048452
          policy_loss: 0.026087168272998597
          total_loss: 1.1287943270471361
          vf_explained_var: -0.11669451743364334
          vf_loss: 1.111492157396343
    num_agent_steps_sampled: 994000
    num_agent_steps_trained: 994000
    num_steps_sampled: 994000
    num_steps_trained: 994000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,994,26383.5,994000,-39.975,-21.5,-75.6,399.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 995000
  custom_metrics: {}
  date: 2021-10-29_04-26-25
  done: false
  episode_len_mean: 398.31
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -39.83100000000026
  episode_reward_min: -75.59999999999998
  episodes_this_iter: 2
  episodes_total: 3307
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 1.0525053964720832
          entropy_coeff: 0.009999999999999998
          kl: 0.0166865898328813
          policy_loss: -0.07907563207878007
          total_loss: 0.8695100148518881
          vf_explained_var: 0.3077717125415802
          vf_loss: 0.9581269873513116
    num_agent_steps_sampled: 995000
    num_agent_steps_trained: 995000
    num_steps_sampled: 995000
    num_steps_trained: 995000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,995,26403.3,995000,-39.831,-21.5,-75.6,398.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 996000
  custom_metrics: {}
  date: 2021-10-29_04-26-50
  done: false
  episode_len_mean: 395.04
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -39.50400000000026
  episode_reward_min: -75.59999999999998
  episodes_this_iter: 4
  episodes_total: 3311
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.058951999108534985
          cur_lr: 5.000000000000001e-05
          entropy: 1.2033666359053719
          entropy_coeff: 0.009999999999999998
          kl: 0.03310475857514514
          policy_loss: 0.04632964713705911
          total_loss: 1.227327745490604
          vf_explained_var: 0.49150314927101135
          vf_loss: 1.1910801647437943
    num_agent_steps_sampled: 996000
    num_agent_steps_trained: 996000
    num_steps_sampled: 996000
    num_steps_trained: 996000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,996,26428.8,996000,-39.504,-23.9,-75.6,395.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 997000
  custom_metrics: {}
  date: 2021-10-29_04-27-17
  done: false
  episode_len_mean: 387.02
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -38.70200000000026
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 4
  episodes_total: 3315
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 1.1224369949764677
          entropy_coeff: 0.009999999999999998
          kl: 0.011349470184043121
          policy_loss: 0.03519744409455194
          total_loss: 1.17551821536488
          vf_explained_var: 0.4307838976383209
          vf_loss: 1.1505415267414516
    num_agent_steps_sampled: 997000
    num_agent_steps_trained: 997000
    num_steps_sampled: 997000
    num_steps_trained: 997000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,997,26455.2,997000,-38.702,-23.9,-72.3,387.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 998000
  custom_metrics: {}
  date: 2021-10-29_04-27-45
  done: false
  episode_len_mean: 382.51
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -38.25100000000025
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 4
  episodes_total: 3319
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08842799866280247
          cur_lr: 5.000000000000001e-05
          entropy: 0.9990227858225504
          entropy_coeff: 0.009999999999999998
          kl: 0.02258381028008832
          policy_loss: 0.10762127422624164
          total_loss: 0.7679774026076
          vf_explained_var: 0.5969005227088928
          vf_loss: 0.6683493127425512
    num_agent_steps_sampled: 998000
    num_agent_steps_trained: 998000
    num_steps_sampled: 998000
    num_steps_trained: 998000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,998,26483.5,998000,-38.251,-23.9,-72.3,382.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 999000
  custom_metrics: {}
  date: 2021-10-29_04-28-13
  done: false
  episode_len_mean: 374.11
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -37.41100000000025
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 3
  episodes_total: 3322
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13264199799420373
          cur_lr: 5.000000000000001e-05
          entropy: 1.2182512071397569
          entropy_coeff: 0.009999999999999998
          kl: 0.04500342100688339
          policy_loss: -0.018910966647995842
          total_loss: 0.7191760246952374
          vf_explained_var: 0.7664400935173035
          vf_loss: 0.7443001455730862
    num_agent_steps_sampled: 999000
    num_agent_steps_trained: 999000
    num_steps_sampled: 999000
    num_steps_trained: 999000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,999,26511.5,999000,-37.411,-23.9,-72.3,374.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1000000
  custom_metrics: {}
  date: 2021-10-29_04-28-40
  done: false
  episode_len_mean: 371.3
  episode_media: {}
  episode_reward_max: -23.90000000000007
  episode_reward_mean: -37.130000000000244
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 4
  episodes_total: 3326
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.0582876397503747
          entropy_coeff: 0.009999999999999998
          kl: 0.019928060782823484
          policy_loss: 0.027621678014596304
          total_loss: 1.070463416311476
          vf_explained_var: 0.4673255980014801
          vf_loss: 1.0494596769412359
    num_agent_steps_sampled: 1000000
    num_agent_steps_trained: 1000000
    num_steps_sampled: 1000000
    num_steps_trained: 1000000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1000,26538.7,1000000,-37.13,-23.9,-72.3,371.3




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1001000
  custom_metrics: {}
  date: 2021-10-29_04-29-22
  done: false
  episode_len_mean: 368.46
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -36.84600000000024
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 4
  episodes_total: 3330
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.3615834408336216
          entropy_coeff: 0.009999999999999998
          kl: 0.019373017312186866
          policy_loss: 0.0718276600042979
          total_loss: 0.8643524138463868
          vf_explained_var: 0.6352053880691528
          vf_loss: 0.8022860719097985
    num_agent_steps_sampled: 1001000
    num_agent_steps_trained: 1001000
    num_steps_sampled: 1001000
    num_steps_trained: 1001000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1001,26580.2,1001000,-36.846,-23.5,-72.3,368.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1002000
  custom_metrics: {}
  date: 2021-10-29_04-29-51
  done: false
  episode_len_mean: 367.61
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -36.76100000000024
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 3
  episodes_total: 3333
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 0.9844292475117578
          entropy_coeff: 0.009999999999999998
          kl: 0.005869595579788012
          policy_loss: -0.07327414171563254
          total_loss: 0.8443207681179047
          vf_explained_var: -0.10251534730195999
          vf_loss: 0.9262713688943122
    num_agent_steps_sampled: 1002000
    num_agent_steps_trained: 1002000
    num_steps_sampled: 1002000
    num_steps_trained: 100200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1002,26609.1,1002000,-36.761,-23.5,-72.3,367.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1003000
  custom_metrics: {}
  date: 2021-10-29_04-30-10
  done: false
  episode_len_mean: 360.15
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -36.01500000000023
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 3
  episodes_total: 3336
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.6908257603645325
          entropy_coeff: 0.009999999999999998
          kl: 0.017974353203042408
          policy_loss: 0.06201606459087795
          total_loss: 0.8339536779456669
          vf_explained_var: -0.17059531807899475
          vf_loss: 0.7852696499062909
    num_agent_steps_sampled: 1003000
    num_agent_steps_trained: 1003000
    num_steps_sampled: 1003000
    num_steps_trained: 1003000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1003,26627.9,1003000,-36.015,-23.5,-72.3,360.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1004000
  custom_metrics: {}
  date: 2021-10-29_04-30-34
  done: false
  episode_len_mean: 358.31
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -35.83100000000023
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 3
  episodes_total: 3339
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.3011406262715657
          entropy_coeff: 0.009999999999999998
          kl: 0.010528641151766275
          policy_loss: 0.03850499341885249
          total_loss: 0.9057061930497488
          vf_explained_var: 0.019534870982170105
          vf_loss: 0.8781177908182144
    num_agent_steps_sampled: 1004000
    num_agent_steps_trained: 1004000
    num_steps_sampled: 1004000
    num_steps_trained: 1004000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1004,26651.9,1004000,-35.831,-23.5,-72.3,358.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1005000
  custom_metrics: {}
  date: 2021-10-29_04-31-00
  done: false
  episode_len_mean: 352.74
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -35.27400000000023
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 3
  episodes_total: 3342
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.214141892062293
          entropy_coeff: 0.009999999999999998
          kl: 0.008093703270019622
          policy_loss: 0.005363894295361307
          total_loss: 1.051438938246833
          vf_explained_var: 0.07909320294857025
          vf_loss: 1.05660612947411
    num_agent_steps_sampled: 1005000
    num_agent_steps_trained: 1005000
    num_steps_sampled: 1005000
    num_steps_trained: 1005000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1005,26678.5,1005000,-35.274,-23.5,-72.3,352.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1006000
  custom_metrics: {}
  date: 2021-10-29_04-31-21
  done: false
  episode_len_mean: 350.99
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -35.09900000000022
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 3
  episodes_total: 3345
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.2078344179524316
          entropy_coeff: 0.009999999999999998
          kl: 0.01142442477736228
          policy_loss: -0.09058389100763532
          total_loss: 0.8832312607102923
          vf_explained_var: 0.4303940236568451
          vf_loss: 0.9836204595035977
    num_agent_steps_sampled: 1006000
    num_agent_steps_trained: 1006000
    num_steps_sampled: 1006000
    num_steps_trained: 1006000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1006,26699.7,1006000,-35.099,-23.5,-72.3,350.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1007000
  custom_metrics: {}
  date: 2021-10-29_04-31-46
  done: false
  episode_len_mean: 353.2
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -35.32000000000023
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 4
  episodes_total: 3349
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.1975988003942701
          entropy_coeff: 0.009999999999999998
          kl: 0.016136811384166016
          policy_loss: -0.1273300820754634
          total_loss: 0.9371663603517745
          vf_explained_var: 0.5573115944862366
          vf_loss: 1.073261797428131
    num_agent_steps_sampled: 1007000
    num_agent_steps_trained: 1007000
    num_steps_sampled: 1007000
    num_steps_trained: 1007000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1007,26724.1,1007000,-35.32,-23.5,-72.3,353.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1008000
  custom_metrics: {}
  date: 2021-10-29_04-32-11
  done: false
  episode_len_mean: 352.07
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -35.20700000000023
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 3
  episodes_total: 3352
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.4930877566337586
          entropy_coeff: 0.009999999999999998
          kl: 0.012161219078818498
          policy_loss: 0.1058137027753724
          total_loss: 0.8794993268118965
          vf_explained_var: 0.1405847668647766
          vf_loss: 0.7861968740820885
    num_agent_steps_sampled: 1008000
    num_agent_steps_trained: 1008000
    num_steps_sampled: 1008000
    num_steps_trained: 1008000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1008,26748.9,1008000,-35.207,-23.5,-72.3,352.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1009000
  custom_metrics: {}
  date: 2021-10-29_04-32-34
  done: false
  episode_len_mean: 354.81
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -35.48100000000023
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 3
  episodes_total: 3355
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19896299699130554
          cur_lr: 5.000000000000001e-05
          entropy: 1.4645824909210206
          entropy_coeff: 0.009999999999999998
          kl: 0.025100943994742882
          policy_loss: 0.07916952934530046
          total_loss: 0.7996296104457643
          vf_explained_var: 0.08178138732910156
          vf_loss: 0.7301117395361264
    num_agent_steps_sampled: 1009000
    num_agent_steps_trained: 1009000
    num_steps_sampled: 1009000
    num_steps_trained: 1009000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1009,26771.8,1009000,-35.481,-23.5,-72.3,354.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1010000
  custom_metrics: {}
  date: 2021-10-29_04-33-02
  done: false
  episode_len_mean: 347.67
  episode_media: {}
  episode_reward_max: -23.500000000000064
  episode_reward_mean: -34.767000000000216
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 4
  episodes_total: 3359
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 1.073420974943373
          entropy_coeff: 0.009999999999999998
          kl: 0.007980865495835904
          policy_loss: 0.006937771456109153
          total_loss: 0.9717808187007904
          vf_explained_var: 0.5179334878921509
          vf_loss: 0.9731954084502326
    num_agent_steps_sampled: 1010000
    num_agent_steps_trained: 1010000
    num_steps_sampled: 1010000
    num_steps_trained: 1010000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1010,26800.5,1010000,-34.767,-23.5,-72.3,347.67




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1011000
  custom_metrics: {}
  date: 2021-10-29_04-33-48
  done: false
  episode_len_mean: 344.17
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -34.417000000000215
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 4
  episodes_total: 3363
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 0.9482615338431464
          entropy_coeff: 0.009999999999999998
          kl: 0.006651291161952757
          policy_loss: 0.06138713964157634
          total_loss: 1.1346743888325161
          vf_explained_var: 0.2808459401130676
          vf_loss: 1.0807848175366719
    num_agent_steps_sampled: 1011000
    num_agent_steps_trained: 1011000
    num_steps_sampled: 1011000
    num_steps_trained: 1011000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1011,26846.4,1011000,-34.417,-21,-72.3,344.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1012000
  custom_metrics: {}
  date: 2021-10-29_04-34-16
  done: false
  episode_len_mean: 341.25
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -34.125000000000206
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 4
  episodes_total: 3367
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29844449548695823
          cur_lr: 5.000000000000001e-05
          entropy: 0.9454132325119442
          entropy_coeff: 0.009999999999999998
          kl: 0.004960987503964118
          policy_loss: 0.013538290560245515
          total_loss: 1.2019131859143575
          vf_explained_var: 0.32238003611564636
          vf_loss: 1.196348445945316
    num_agent_steps_sampled: 1012000
    num_agent_steps_trained: 1012000
    num_steps_sampled: 1012000
    num_steps_trained: 1012000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1012,26874,1012000,-34.125,-21,-72.3,341.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1013000
  custom_metrics: {}
  date: 2021-10-29_04-34-37
  done: false
  episode_len_mean: 342.97
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -34.29700000000022
  episode_reward_min: -72.30000000000017
  episodes_this_iter: 2
  episodes_total: 3369
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.0630496342976887
          entropy_coeff: 0.009999999999999998
          kl: 0.007948004717255822
          policy_loss: -0.14973780198229683
          total_loss: 0.5836613406737645
          vf_explained_var: 0.567497193813324
          vf_loss: 0.7428436179127959
    num_agent_steps_sampled: 1013000
    num_agent_steps_trained: 1013000
    num_steps_sampled: 1013000
    num_steps_trained: 1013000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1013,26895.4,1013000,-34.297,-21,-72.3,342.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1014000
  custom_metrics: {}
  date: 2021-10-29_04-35-03
  done: false
  episode_len_mean: 335.28
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -33.528000000000205
  episode_reward_min: -65.20000000000057
  episodes_this_iter: 4
  episodes_total: 3373
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 0.9615145285924276
          entropy_coeff: 0.009999999999999998
          kl: 0.018860686122495144
          policy_loss: 0.06096331129471461
          total_loss: 0.8517953058083853
          vf_explained_var: 0.6464206576347351
          vf_loss: 0.7976327048407661
    num_agent_steps_sampled: 1014000
    num_agent_steps_trained: 1014000
    num_steps_sampled: 1014000
    num_steps_trained: 1014000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1014,26921.1,1014000,-33.528,-21,-65.2,335.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1015000
  custom_metrics: {}
  date: 2021-10-29_04-35-25
  done: false
  episode_len_mean: 333.25
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -33.3250000000002
  episode_reward_min: -65.20000000000057
  episodes_this_iter: 3
  episodes_total: 3376
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14922224774347911
          cur_lr: 5.000000000000001e-05
          entropy: 1.0974727974997627
          entropy_coeff: 0.009999999999999998
          kl: 0.027546980559274164
          policy_loss: 0.08536734961801105
          total_loss: 0.9200008836057451
          vf_explained_var: 0.3230332136154175
          vf_loss: 0.8414976384490729
    num_agent_steps_sampled: 1015000
    num_agent_steps_trained: 1015000
    num_steps_sampled: 1015000
    num_steps_trained: 1015000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1015,26943.2,1015000,-33.325,-21,-65.2,333.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1016000
  custom_metrics: {}
  date: 2021-10-29_04-35-48
  done: false
  episode_len_mean: 332.56
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -33.2560000000002
  episode_reward_min: -65.20000000000057
  episodes_this_iter: 3
  episodes_total: 3379
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22383337161521868
          cur_lr: 5.000000000000001e-05
          entropy: 1.1013782024383545
          entropy_coeff: 0.009999999999999998
          kl: 0.012278983935676122
          policy_loss: 0.014449312393036153
          total_loss: 0.546374392343892
          vf_explained_var: 0.7748335003852844
          vf_loss: 0.540190419058005
    num_agent_steps_sampled: 1016000
    num_agent_steps_trained: 1016000
    num_steps_sampled: 1016000
    num_steps_trained: 1016000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1016,26965.8,1016000,-33.256,-21,-65.2,332.56


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1017000
  custom_metrics: {}
  date: 2021-10-29_04-36-09
  done: false
  episode_len_mean: 326.25
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -32.6250000000002
  episode_reward_min: -64.10000000000063
  episodes_this_iter: 3
  episodes_total: 3382
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22383337161521868
          cur_lr: 5.000000000000001e-05
          entropy: 1.5463725937737358
          entropy_coeff: 0.009999999999999998
          kl: 0.0329922659313354
          policy_loss: 0.06867756206128332
          total_loss: 0.91026913954152
          vf_explained_var: 0.317959189414978
          vf_loss: 0.8496705361952385
    num_agent_steps_sampled: 1017000
    num_agent_steps_trained: 1017000
    num_steps_sampled: 1017000
    num_steps_trained: 1017000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1017,26987,1017000,-32.625,-21,-64.1,326.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1018000
  custom_metrics: {}
  date: 2021-10-29_04-36-34
  done: false
  episode_len_mean: 320.21
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -32.02100000000018
  episode_reward_min: -59.80000000000058
  episodes_this_iter: 3
  episodes_total: 3385
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3357500574228281
          cur_lr: 5.000000000000001e-05
          entropy: 1.2830034520890978
          entropy_coeff: 0.009999999999999998
          kl: 0.028052678309584658
          policy_loss: 0.07991848240296046
          total_loss: 0.768714048465093
          vf_explained_var: 0.7208034992218018
          vf_loss: 0.6922069246570269
    num_agent_steps_sampled: 1018000
    num_agent_steps_trained: 1018000
    num_steps_sampled: 1018000
    num_steps_trained: 1018000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1018,27012.3,1018000,-32.021,-21,-59.8,320.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1019000
  custom_metrics: {}
  date: 2021-10-29_04-37-01
  done: false
  episode_len_mean: 313.38
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -31.338000000000175
  episode_reward_min: -59.80000000000058
  episodes_this_iter: 4
  episodes_total: 3389
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5036250861342422
          cur_lr: 5.000000000000001e-05
          entropy: 1.2049243450164795
          entropy_coeff: 0.009999999999999998
          kl: 0.012085591611134955
          policy_loss: -0.017626540197266474
          total_loss: 1.0934825241565704
          vf_explained_var: 0.4037570059299469
          vf_loss: 1.117071702082952
    num_agent_steps_sampled: 1019000
    num_agent_steps_trained: 1019000
    num_steps_sampled: 1019000
    num_steps_trained: 1019000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1019,27039.3,1019000,-31.338,-21,-59.8,313.38




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1020000
  custom_metrics: {}
  date: 2021-10-29_04-37-39
  done: false
  episode_len_mean: 308.52
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.852000000000167
  episode_reward_min: -59.80000000000058
  episodes_this_iter: 2
  episodes_total: 3391
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5036250861342422
          cur_lr: 5.000000000000001e-05
          entropy: 0.907732465532091
          entropy_coeff: 0.009999999999999998
          kl: 0.008892094980008786
          policy_loss: -0.10234993638263809
          total_loss: 0.9029106337163183
          vf_explained_var: 0.0864228904247284
          vf_loss: 1.0098596223940453
    num_agent_steps_sampled: 1020000
    num_agent_steps_trained: 1020000
    num_steps_sampled: 1020000
    num_steps_trained: 1020000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1020,27076.7,1020000,-30.852,-21,-59.8,308.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1021000
  custom_metrics: {}
  date: 2021-10-29_04-38-03
  done: false
  episode_len_mean: 303.31
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.331000000000166
  episode_reward_min: -55.200000000000514
  episodes_this_iter: 4
  episodes_total: 3395
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5036250861342422
          cur_lr: 5.000000000000001e-05
          entropy: 1.0956107550197178
          entropy_coeff: 0.009999999999999998
          kl: 0.004847752513872352
          policy_loss: 0.04523912568887075
          total_loss: 1.2934190471967062
          vf_explained_var: 0.3948853313922882
          vf_loss: 1.2566945645544263
    num_agent_steps_sampled: 1021000
    num_agent_steps_trained: 1021000
    num_steps_sampled: 1021000
    num_steps_trained: 1021000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1021,27101.5,1021000,-30.331,-21,-55.2,303.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1022000
  custom_metrics: {}
  date: 2021-10-29_04-38-26
  done: false
  episode_len_mean: 304.04
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.40400000000016
  episode_reward_min: -55.200000000000514
  episodes_this_iter: 3
  episodes_total: 3398
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2518125430671211
          cur_lr: 5.000000000000001e-05
          entropy: 1.0699654161930083
          entropy_coeff: 0.009999999999999998
          kl: 0.01050844579839918
          policy_loss: 0.013664883375167847
          total_loss: 0.9227715952528848
          vf_explained_var: 0.2177298367023468
          vf_loss: 0.9171602017349667
    num_agent_steps_sampled: 1022000
    num_agent_steps_trained: 1022000
    num_steps_sampled: 1022000
    num_steps_trained: 1022000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1022,27124.1,1022000,-30.404,-21,-55.2,304.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1023000
  custom_metrics: {}
  date: 2021-10-29_04-38-49
  done: false
  episode_len_mean: 306.39
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.639000000000166
  episode_reward_min: -55.200000000000514
  episodes_this_iter: 3
  episodes_total: 3401
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2518125430671211
          cur_lr: 5.000000000000001e-05
          entropy: 1.112233000331455
          entropy_coeff: 0.009999999999999998
          kl: 0.007661866058710132
          policy_loss: -0.03321155574586657
          total_loss: 0.896701282593939
          vf_explained_var: 0.6088657975196838
          vf_loss: 0.9391058312108119
    num_agent_steps_sampled: 1023000
    num_agent_steps_trained: 1023000
    num_steps_sampled: 1023000
    num_steps_trained: 1023000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1023,27146.5,1023000,-30.639,-21,-55.2,306.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1024000
  custom_metrics: {}
  date: 2021-10-29_04-39-16
  done: false
  episode_len_mean: 302.06
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.20600000000016
  episode_reward_min: -50.400000000000446
  episodes_this_iter: 3
  episodes_total: 3404
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2518125430671211
          cur_lr: 5.000000000000001e-05
          entropy: 0.9477059344450632
          entropy_coeff: 0.009999999999999998
          kl: 0.007799168126057548
          policy_loss: 0.017328984042008718
          total_loss: 1.0891110910309685
          vf_explained_var: -0.013540108688175678
          vf_loss: 1.0792952574789525
    num_agent_steps_sampled: 1024000
    num_agent_steps_trained: 1024000
    num_steps_sampled: 1024000
    num_steps_trained: 102400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1024,27173.7,1024000,-30.206,-21,-50.4,302.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1025000
  custom_metrics: {}
  date: 2021-10-29_04-39-41
  done: false
  episode_len_mean: 299.12
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -29.91200000000016
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3408
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2518125430671211
          cur_lr: 5.000000000000001e-05
          entropy: 0.8604212456279331
          entropy_coeff: 0.009999999999999998
          kl: 0.005477357606605674
          policy_loss: 0.024612137509716882
          total_loss: 1.4058325701289707
          vf_explained_var: 0.24996501207351685
          vf_loss: 1.3884453839725919
    num_agent_steps_sampled: 1025000
    num_agent_steps_trained: 1025000
    num_steps_sampled: 1025000
    num_steps_trained: 1025000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1025,27198.5,1025000,-29.912,-21,-47.9,299.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1026000
  custom_metrics: {}
  date: 2021-10-29_04-40-02
  done: false
  episode_len_mean: 300.88
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.088000000000157
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 2
  episodes_total: 3410
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2518125430671211
          cur_lr: 5.000000000000001e-05
          entropy: 1.0075781279140048
          entropy_coeff: 0.009999999999999998
          kl: 0.015891258064481186
          policy_loss: -0.07954704148901834
          total_loss: 0.936799842119217
          vf_explained_var: 0.46356093883514404
          vf_loss: 1.0224210552871227
    num_agent_steps_sampled: 1026000
    num_agent_steps_trained: 1026000
    num_steps_sampled: 1026000
    num_steps_trained: 1026000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1026,27219.5,1026000,-30.088,-21,-47.9,300.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1027000
  custom_metrics: {}
  date: 2021-10-29_04-40-25
  done: false
  episode_len_mean: 302.76
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.276000000000163
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3414
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2518125430671211
          cur_lr: 5.000000000000001e-05
          entropy: 0.9103820085525512
          entropy_coeff: 0.009999999999999998
          kl: 0.029231974418505443
          policy_loss: 0.03617237756649653
          total_loss: 1.0225864642196232
          vf_explained_var: 0.3465580344200134
          vf_loss: 0.9881569299432966
    num_agent_steps_sampled: 1027000
    num_agent_steps_trained: 1027000
    num_steps_sampled: 1027000
    num_steps_trained: 1027000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1027,27242.9,1027000,-30.276,-21,-47.9,302.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1028000
  custom_metrics: {}
  date: 2021-10-29_04-40-51
  done: false
  episode_len_mean: 304.08
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.408000000000158
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 3
  episodes_total: 3417
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3777188146006817
          cur_lr: 5.000000000000001e-05
          entropy: 1.0451618101861742
          entropy_coeff: 0.009999999999999998
          kl: 0.014438804745646057
          policy_loss: -0.03664556394020716
          total_loss: 1.1375867380036249
          vf_explained_var: 0.32628950476646423
          vf_loss: 1.1792301209022602
    num_agent_steps_sampled: 1028000
    num_agent_steps_trained: 1028000
    num_steps_sampled: 1028000
    num_steps_trained: 1028000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1028,27268.3,1028000,-30.408,-21,-47.9,304.08




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1029000
  custom_metrics: {}
  date: 2021-10-29_04-41-36
  done: false
  episode_len_mean: 303.81
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.381000000000164
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3421
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3777188146006817
          cur_lr: 5.000000000000001e-05
          entropy: 1.0420199837949542
          entropy_coeff: 0.009999999999999998
          kl: 0.01926143212961653
          policy_loss: -0.033594009652733806
          total_loss: 1.3738275375631122
          vf_explained_var: 0.24362444877624512
          vf_loss: 1.410566336578793
    num_agent_steps_sampled: 1029000
    num_agent_steps_trained: 1029000
    num_steps_sampled: 1029000
    num_steps_trained: 1029000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1029,27313.5,1029000,-30.381,-21,-47.9,303.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1030000
  custom_metrics: {}
  date: 2021-10-29_04-42-03
  done: false
  episode_len_mean: 303.27
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.32700000000016
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3425
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3777188146006817
          cur_lr: 5.000000000000001e-05
          entropy: 0.9704448693328434
          entropy_coeff: 0.009999999999999998
          kl: 0.006646881399348735
          policy_loss: 0.03516239962644047
          total_loss: 1.1407442106140984
          vf_explained_var: 0.3219713568687439
          vf_loss: 1.1127756118774415
    num_agent_steps_sampled: 1030000
    num_agent_steps_trained: 1030000
    num_steps_sampled: 1030000
    num_steps_trained: 1030000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1030,27341.1,1030000,-30.327,-21,-47.9,303.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1031000
  custom_metrics: {}
  date: 2021-10-29_04-42-32
  done: false
  episode_len_mean: 301.59
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.159000000000155
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3429
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3777188146006817
          cur_lr: 5.000000000000001e-05
          entropy: 0.8570245901743571
          entropy_coeff: 0.009999999999999998
          kl: 0.02265145316192216
          policy_loss: 0.010255323681566451
          total_loss: 0.9738300522168477
          vf_explained_var: 0.2644924521446228
          vf_loss: 0.9635890927579668
    num_agent_steps_sampled: 1031000
    num_agent_steps_trained: 1031000
    num_steps_sampled: 1031000
    num_steps_trained: 1031000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1031,27369.4,1031000,-30.159,-21,-47.9,301.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1032000
  custom_metrics: {}
  date: 2021-10-29_04-43-00
  done: false
  episode_len_mean: 301.3
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -30.13000000000016
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3433
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.8966219325860342
          entropy_coeff: 0.009999999999999998
          kl: 0.007627280250690187
          policy_loss: 0.017892523316873445
          total_loss: 1.162021071381039
          vf_explained_var: 0.15772700309753418
          vf_loss: 1.1487733317746056
    num_agent_steps_sampled: 1032000
    num_agent_steps_trained: 1032000
    num_steps_sampled: 1032000
    num_steps_trained: 1032000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1032,27397.7,1032000,-30.13,-21,-47.9,301.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1033000
  custom_metrics: {}
  date: 2021-10-29_04-43-29
  done: false
  episode_len_mean: 295.87
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -29.587000000000153
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3437
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.8773740165763431
          entropy_coeff: 0.009999999999999998
          kl: 0.006389047509665838
          policy_loss: 0.010881762868828243
          total_loss: 0.965722182724211
          vf_explained_var: 0.3543849289417267
          vf_loss: 0.9599942611323462
    num_agent_steps_sampled: 1033000
    num_agent_steps_trained: 1033000
    num_steps_sampled: 1033000
    num_steps_trained: 1033000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1033,27426.5,1033000,-29.587,-21,-47.9,295.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1034000
  custom_metrics: {}
  date: 2021-10-29_04-43-56
  done: false
  episode_len_mean: 295.02
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -29.502000000000145
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 3
  episodes_total: 3440
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.9053225066926744
          entropy_coeff: 0.009999999999999998
          kl: 0.008849308914819748
          policy_loss: -0.07128588093651665
          total_loss: 1.0734731872876486
          vf_explained_var: 0.13506236672401428
          vf_loss: 1.1487984518210093
    num_agent_steps_sampled: 1034000
    num_agent_steps_trained: 1034000
    num_steps_sampled: 1034000
    num_steps_trained: 1034000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1034,27454.1,1034000,-29.502,-21,-47.9,295.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1035000
  custom_metrics: {}
  date: 2021-10-29_04-44-25
  done: false
  episode_len_mean: 294.2
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -29.420000000000144
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3444
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.9564967420366075
          entropy_coeff: 0.009999999999999998
          kl: 0.015121234142679423
          policy_loss: -0.033194171223375535
          total_loss: 1.2436796267827352
          vf_explained_var: 0.25971338152885437
          vf_loss: 1.2778714001178741
    num_agent_steps_sampled: 1035000
    num_agent_steps_trained: 1035000
    num_steps_sampled: 1035000
    num_steps_trained: 1035000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1035,27482.3,1035000,-29.42,-21,-47.9,294.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1036000
  custom_metrics: {}
  date: 2021-10-29_04-44-52
  done: false
  episode_len_mean: 291.64
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -29.164000000000147
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3448
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.9076490514808231
          entropy_coeff: 0.009999999999999998
          kl: 0.010124138858198212
          policy_loss: -0.08707580798202091
          total_loss: 1.2142406231827205
          vf_explained_var: 0.22917598485946655
          vf_loss: 1.3046567943361072
    num_agent_steps_sampled: 1036000
    num_agent_steps_trained: 1036000
    num_steps_sampled: 1036000
    num_steps_trained: 1036000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1036,27509.9,1036000,-29.164,-21,-47.9,291.64




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1037000
  custom_metrics: {}
  date: 2021-10-29_04-45-37
  done: false
  episode_len_mean: 289.34
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -28.934000000000133
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3452
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.9227745129002465
          entropy_coeff: 0.009999999999999998
          kl: 0.0109371146481527
          policy_loss: -0.007735319683949153
          total_loss: 1.227561820215649
          vf_explained_var: 0.29202356934547424
          vf_loss: 1.2383281562063428
    num_agent_steps_sampled: 1037000
    num_agent_steps_trained: 1037000
    num_steps_sampled: 1037000
    num_steps_trained: 1037000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1037,27554.8,1037000,-28.934,-21,-47.9,289.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1038000
  custom_metrics: {}
  date: 2021-10-29_04-46-05
  done: false
  episode_len_mean: 287.79
  episode_media: {}
  episode_reward_max: -21.00000000000003
  episode_reward_mean: -28.779000000000142
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3456
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.9957158943017324
          entropy_coeff: 0.009999999999999998
          kl: 0.010889987374915932
          policy_loss: -0.001832415618830257
          total_loss: 0.7980109506183201
          vf_explained_var: 0.4827153980731964
          vf_loss: 0.803630503349834
    num_agent_steps_sampled: 1038000
    num_agent_steps_trained: 1038000
    num_steps_sampled: 1038000
    num_steps_trained: 1038000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1038,27582.5,1038000,-28.779,-21,-47.9,287.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1039000
  custom_metrics: {}
  date: 2021-10-29_04-46-34
  done: false
  episode_len_mean: 287.84
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -28.784000000000137
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3460
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.8984646532270644
          entropy_coeff: 0.009999999999999998
          kl: 0.006203045366647804
          policy_loss: 0.023748119672139485
          total_loss: 0.8650292598538929
          vf_explained_var: 0.4403234124183655
          vf_loss: 0.8467512806256612
    num_agent_steps_sampled: 1039000
    num_agent_steps_trained: 1039000
    num_steps_sampled: 1039000
    num_steps_trained: 1039000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1039,27611.9,1039000,-28.784,-21.7,-47.9,287.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1040000
  custom_metrics: {}
  date: 2021-10-29_04-47-03
  done: false
  episode_len_mean: 287.62
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -28.762000000000135
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 4
  episodes_total: 3464
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.8752729972203572
          entropy_coeff: 0.009999999999999998
          kl: 0.009592687965604006
          policy_loss: 0.008640677481889725
          total_loss: 0.8787079201804266
          vf_explained_var: 0.3145277798175812
          vf_loss: 0.8733849664529164
    num_agent_steps_sampled: 1040000
    num_agent_steps_trained: 1040000
    num_steps_sampled: 1040000
    num_steps_trained: 1040000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1040,27640.4,1040000,-28.762,-21.7,-47.9,287.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1041000
  custom_metrics: {}
  date: 2021-10-29_04-47-30
  done: false
  episode_len_mean: 288.24
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -28.824000000000137
  episode_reward_min: -47.90000000000041
  episodes_this_iter: 3
  episodes_total: 3467
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 1.1764904485808478
          entropy_coeff: 0.009999999999999998
          kl: 0.01695483386368911
          policy_loss: -0.0512923217482037
          total_loss: 1.0759215315183004
          vf_explained_var: 0.3459457755088806
          vf_loss: 1.129372517267863
    num_agent_steps_sampled: 1041000
    num_agent_steps_trained: 1041000
    num_steps_sampled: 1041000
    num_steps_trained: 1041000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1041,27667.3,1041000,-28.824,-21.7,-47.9,288.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1042000
  custom_metrics: {}
  date: 2021-10-29_04-47-58
  done: false
  episode_len_mean: 284.84
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -28.48400000000013
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 3471
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.9722617871231503
          entropy_coeff: 0.009999999999999998
          kl: 0.012231344426054176
          policy_loss: -0.04913470389114486
          total_loss: 0.6370034558905495
          vf_explained_var: 0.6333792209625244
          vf_loss: 0.6889307657877605
    num_agent_steps_sampled: 1042000
    num_agent_steps_trained: 1042000
    num_steps_sampled: 1042000
    num_steps_trained: 1042000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1042,27695.5,1042000,-28.484,-21.7,-46.4,284.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1043000
  custom_metrics: {}
  date: 2021-10-29_04-48-27
  done: false
  episode_len_mean: 281.52
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -28.152000000000125
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 3475
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 1.0627585344844395
          entropy_coeff: 0.009999999999999998
          kl: 0.008485050140722771
          policy_loss: -0.045799390930268497
          total_loss: 0.6146018746826384
          vf_explained_var: 0.7661765217781067
          vf_loss: 0.666221408214834
    num_agent_steps_sampled: 1043000
    num_agent_steps_trained: 1043000
    num_steps_sampled: 1043000
    num_steps_trained: 1043000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1043,27724.4,1043000,-28.152,-21.7,-46.4,281.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1044000
  custom_metrics: {}
  date: 2021-10-29_04-48-54
  done: false
  episode_len_mean: 280.52
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -28.05200000000013
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 3479
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 1.1406402839554681
          entropy_coeff: 0.009999999999999998
          kl: 0.013712170254203215
          policy_loss: -0.0725045999719037
          total_loss: 0.9547819528314803
          vf_explained_var: 0.6229308247566223
          vf_loss: 1.0309239470296436
    num_agent_steps_sampled: 1044000
    num_agent_steps_trained: 1044000
    num_steps_sampled: 1044000
    num_steps_trained: 1044000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1044,27751.4,1044000,-28.052,-21.7,-46.4,280.52




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1045000
  custom_metrics: {}
  date: 2021-10-29_04-49-37
  done: false
  episode_len_mean: 276.95
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.695000000000118
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 3483
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.7700728476047516
          entropy_coeff: 0.009999999999999998
          kl: 0.005137215833058306
          policy_loss: -0.04633289095428255
          total_loss: 0.8131377975145976
          vf_explained_var: 0.6166496872901917
          vf_loss: 0.8642607827981313
    num_agent_steps_sampled: 1045000
    num_agent_steps_trained: 1045000
    num_steps_sampled: 1045000
    num_steps_trained: 1045000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1045,27794.4,1045000,-27.695,-21.6,-46.4,276.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1046000
  custom_metrics: {}
  date: 2021-10-29_04-50-03
  done: false
  episode_len_mean: 276.49
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.649000000000125
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 3486
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 1.1361764397886065
          entropy_coeff: 0.009999999999999998
          kl: 0.009587864826141681
          policy_loss: -0.025657531950208874
          total_loss: 0.8900726341538959
          vf_explained_var: -0.1046232208609581
          vf_loss: 0.9216596465971735
    num_agent_steps_sampled: 1046000
    num_agent_steps_trained: 1046000
    num_steps_sampled: 1046000
    num_steps_trained: 104600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1046,27820.8,1046000,-27.649,-21.6,-46.4,276.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1047000
  custom_metrics: {}
  date: 2021-10-29_04-50-32
  done: false
  episode_len_mean: 276.6
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.660000000000124
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 3490
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 1.0116819593641493
          entropy_coeff: 0.009999999999999998
          kl: 0.015391310223542594
          policy_loss: 0.012542060431506899
          total_loss: 1.0206634481747945
          vf_explained_var: 0.44905149936676025
          vf_loss: 1.0095178180270725
    num_agent_steps_sampled: 1047000
    num_agent_steps_trained: 1047000
    num_steps_sampled: 1047000
    num_steps_trained: 1047000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1047,27849.1,1047000,-27.66,-21.6,-46.4,276.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1048000
  custom_metrics: {}
  date: 2021-10-29_04-51-00
  done: false
  episode_len_mean: 273.02
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -27.302000000000113
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 3494
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.7806151111920675
          entropy_coeff: 0.009999999999999998
          kl: 0.010184169836151113
          policy_loss: 0.0027256992956002555
          total_loss: 0.9917546517319149
          vf_explained_var: 0.3841681480407715
          vf_loss: 0.9910649663872189
    num_agent_steps_sampled: 1048000
    num_agent_steps_trained: 1048000
    num_steps_sampled: 1048000
    num_steps_trained: 1048000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1048,27877.5,1048000,-27.302,-21.6,-46.4,273.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1049000
  custom_metrics: {}
  date: 2021-10-29_04-51-28
  done: false
  episode_len_mean: 269.91
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.99100000000011
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 3498
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.7222600155406528
          entropy_coeff: 0.009999999999999998
          kl: 0.01339277515376504
          policy_loss: 0.01942128890918361
          total_loss: 0.9354348997275035
          vf_explained_var: 0.5704218745231628
          vf_loss: 0.915648141834471
    num_agent_steps_sampled: 1049000
    num_agent_steps_trained: 1049000
    num_steps_sampled: 1049000
    num_steps_trained: 1049000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1049,27905.6,1049000,-26.991,-21.6,-46.4,269.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1050000
  custom_metrics: {}
  date: 2021-10-29_04-51-56
  done: false
  episode_len_mean: 267.27
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.72700000000011
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 3
  episodes_total: 3501
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.8048305216762754
          entropy_coeff: 0.009999999999999998
          kl: 0.007842424631113844
          policy_loss: -0.10538923111226824
          total_loss: 1.0156024005677966
          vf_explained_var: 0.12365900725126266
          vf_loss: 1.1245965864923266
    num_agent_steps_sampled: 1050000
    num_agent_steps_trained: 1050000
    num_steps_sampled: 1050000
    num_steps_trained: 1050000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1050,27933,1050000,-26.727,-21.6,-46.4,267.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1051000
  custom_metrics: {}
  date: 2021-10-29_04-52-24
  done: false
  episode_len_mean: 267.37
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.73700000000011
  episode_reward_min: -46.40000000000039
  episodes_this_iter: 4
  episodes_total: 3505
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.7142542024453481
          entropy_coeff: 0.009999999999999998
          kl: 0.006068459418350614
          policy_loss: -0.0677388542228275
          total_loss: 1.0380252646075354
          vf_explained_var: -0.1338263601064682
          vf_loss: 1.1094683862394756
    num_agent_steps_sampled: 1051000
    num_agent_steps_trained: 1051000
    num_steps_sampled: 1051000
    num_steps_trained: 1051000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1051,27961.1,1051000,-26.737,-21.6,-46.4,267.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1052000
  custom_metrics: {}
  date: 2021-10-29_04-52-53
  done: false
  episode_len_mean: 263.95
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.39500000000011
  episode_reward_min: -38.300000000000274
  episodes_this_iter: 4
  episodes_total: 3509
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.7589492764737871
          entropy_coeff: 0.009999999999999998
          kl: 0.006474347806805348
          policy_loss: -0.026043116632435057
          total_loss: 0.9798302352428436
          vf_explained_var: 0.49684321880340576
          vf_loss: 1.0097946253087786
    num_agent_steps_sampled: 1052000
    num_agent_steps_trained: 1052000
    num_steps_sampled: 1052000
    num_steps_trained: 105200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1052,27990.4,1052000,-26.395,-21.6,-38.3,263.95




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1053000
  custom_metrics: {}
  date: 2021-10-29_04-53-39
  done: false
  episode_len_mean: 262.58
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.258000000000106
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3513
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5665782219010225
          cur_lr: 5.000000000000001e-05
          entropy: 0.6868785281976064
          entropy_coeff: 0.009999999999999998
          kl: 0.004488258641100471
          policy_loss: -0.06486181906527944
          total_loss: 1.0469323661592271
          vf_explained_var: 0.38521432876586914
          vf_loss: 1.116120026508967
    num_agent_steps_sampled: 1053000
    num_agent_steps_trained: 1053000
    num_steps_sampled: 1053000
    num_steps_trained: 1053000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1053,28036.4,1053000,-26.258,-21.6,-37.3,262.58


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1054000
  custom_metrics: {}
  date: 2021-10-29_04-54-06
  done: false
  episode_len_mean: 260.38
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.038000000000103
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3517
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 0.9950386312272813
          entropy_coeff: 0.009999999999999998
          kl: 0.011587086327021249
          policy_loss: 0.046064430309666525
          total_loss: 0.7995620833502876
          vf_explained_var: 0.6357641220092773
          vf_loss: 0.760165551967091
    num_agent_steps_sampled: 1054000
    num_agent_steps_trained: 1054000
    num_steps_sampled: 1054000
    num_steps_trained: 1054000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1054,28063.5,1054000,-26.038,-21.6,-37.3,260.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1055000
  custom_metrics: {}
  date: 2021-10-29_04-54-35
  done: false
  episode_len_mean: 260.29
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.0290000000001
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3521
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 0.9554985569583045
          entropy_coeff: 0.009999999999999998
          kl: 0.017687340611980712
          policy_loss: 0.021849568519327376
          total_loss: 1.0838222199016148
          vf_explained_var: 0.4197039008140564
          vf_loss: 1.066517006026374
    num_agent_steps_sampled: 1055000
    num_agent_steps_trained: 1055000
    num_steps_sampled: 1055000
    num_steps_trained: 1055000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1055,28091.8,1055000,-26.029,-21.6,-37.3,260.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1056000
  custom_metrics: {}
  date: 2021-10-29_04-55-03
  done: false
  episode_len_mean: 260.03
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.003000000000096
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3525
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 1.0075591776106092
          entropy_coeff: 0.009999999999999998
          kl: 0.01780006791559951
          policy_loss: 0.011195695731374952
          total_loss: 1.1892417172590892
          vf_explained_var: 0.36781981587409973
          vf_loss: 1.1830790466732448
    num_agent_steps_sampled: 1056000
    num_agent_steps_trained: 1056000
    num_steps_sampled: 1056000
    num_steps_trained: 1056000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1056,28120.4,1056000,-26.003,-21.6,-37.3,260.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1057000
  custom_metrics: {}
  date: 2021-10-29_04-55-33
  done: false
  episode_len_mean: 259.39
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.939000000000092
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3529
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 0.635052220357789
          entropy_coeff: 0.009999999999999998
          kl: 0.011476010667873817
          policy_loss: 0.01779154265920321
          total_loss: 0.8169456912411583
          vf_explained_var: 0.6322019696235657
          vf_loss: 0.8022536337375641
    num_agent_steps_sampled: 1057000
    num_agent_steps_trained: 1057000
    num_steps_sampled: 1057000
    num_steps_trained: 1057000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1057,28149.9,1057000,-25.939,-21.6,-37.3,259.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1058000
  custom_metrics: {}
  date: 2021-10-29_04-56-01
  done: false
  episode_len_mean: 259.49
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.949000000000098
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3533
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 0.9740794433487786
          entropy_coeff: 0.009999999999999998
          kl: 0.005659035035076777
          policy_loss: 0.1012871572540866
          total_loss: 0.9263317492273119
          vf_explained_var: 0.6036213040351868
          vf_loss: 0.8331822435061137
    num_agent_steps_sampled: 1058000
    num_agent_steps_trained: 1058000
    num_steps_sampled: 1058000
    num_steps_trained: 1058000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1058,28178.2,1058000,-25.949,-21.6,-37.3,259.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1059000
  custom_metrics: {}
  date: 2021-10-29_04-56-30
  done: false
  episode_len_mean: 259.45
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.9450000000001
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3537
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 0.8974698497189416
          entropy_coeff: 0.009999999999999998
          kl: 0.008051942243503083
          policy_loss: -0.002202610464559661
          total_loss: 0.9668437593513065
          vf_explained_var: 0.5410111546516418
          vf_loss: 0.97574004067315
    num_agent_steps_sampled: 1059000
    num_agent_steps_trained: 1059000
    num_steps_sampled: 1059000
    num_steps_trained: 1059000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1059,28206.8,1059000,-25.945,-21.6,-37.3,259.45




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1060000
  custom_metrics: {}
  date: 2021-10-29_04-57-15
  done: false
  episode_len_mean: 259.02
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.902000000000104
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3541
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 0.8885043753517998
          entropy_coeff: 0.009999999999999998
          kl: 0.014823786542557274
          policy_loss: 0.10003993991348478
          total_loss: 1.1831565029091304
          vf_explained_var: 0.4281312823295593
          vf_loss: 1.0878021981981065
    num_agent_steps_sampled: 1060000
    num_agent_steps_trained: 1060000
    num_steps_sampled: 1060000
    num_steps_trained: 1060000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1060,28252,1060000,-25.902,-21.6,-37.3,259.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1061000
  custom_metrics: {}
  date: 2021-10-29_04-57-43
  done: false
  episode_len_mean: 259.45
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.945000000000096
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3545
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 0.8558395514885585
          entropy_coeff: 0.009999999999999998
          kl: 0.019560504084674097
          policy_loss: 0.005963916662666532
          total_loss: 1.1045402930842505
          vf_explained_var: 0.2695479691028595
          vf_loss: 1.1015934997134738
    num_agent_steps_sampled: 1061000
    num_agent_steps_trained: 1061000
    num_steps_sampled: 1061000
    num_steps_trained: 1061000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1061,28280.3,1061000,-25.945,-21.6,-37.3,259.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1062000
  custom_metrics: {}
  date: 2021-10-29_04-58-10
  done: false
  episode_len_mean: 259.76
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -25.976000000000095
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 3
  episodes_total: 3548
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 0.7906152910656399
          entropy_coeff: 0.009999999999999998
          kl: 0.00690449568536743
          policy_loss: -0.06284877384702364
          total_loss: 0.7811301757891973
          vf_explained_var: 0.36043521761894226
          vf_loss: 0.8499291353755527
    num_agent_steps_sampled: 1062000
    num_agent_steps_trained: 1062000
    num_steps_sampled: 1062000
    num_steps_trained: 1062000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1062,28306.8,1062000,-25.976,-21.6,-37.3,259.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1063000
  custom_metrics: {}
  date: 2021-10-29_04-58-38
  done: false
  episode_len_mean: 260.12
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.012000000000103
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3552
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 0.7262803806198967
          entropy_coeff: 0.009999999999999998
          kl: 0.019414094178284456
          policy_loss: -0.026770556552542582
          total_loss: 0.9758543822500441
          vf_explained_var: 0.4398330748081207
          vf_loss: 1.0043879436122047
    num_agent_steps_sampled: 1063000
    num_agent_steps_trained: 1063000
    num_steps_sampled: 1063000
    num_steps_trained: 106300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1063,28335.1,1063000,-26.012,-21.6,-37.3,260.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1064000
  custom_metrics: {}
  date: 2021-10-29_04-59-03
  done: false
  episode_len_mean: 260.44
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.044000000000093
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3556
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 1.0387172639369964
          entropy_coeff: 0.009999999999999998
          kl: 0.018868731472064186
          policy_loss: 0.0763729929096169
          total_loss: 0.9669779154989454
          vf_explained_var: 0.526332437992096
          vf_loss: 0.8956467807292938
    num_agent_steps_sampled: 1064000
    num_agent_steps_trained: 1064000
    num_steps_sampled: 1064000
    num_steps_trained: 1064000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1064,28360.1,1064000,-26.044,-21.6,-37.3,260.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1065000
  custom_metrics: {}
  date: 2021-10-29_04-59-29
  done: false
  episode_len_mean: 261.66
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.166000000000096
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 3
  episodes_total: 3559
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 1.0845762312412262
          entropy_coeff: 0.009999999999999998
          kl: 0.015607748861076232
          policy_loss: -0.005178332577149073
          total_loss: 0.8012144492732154
          vf_explained_var: 0.12090785056352615
          vf_loss: 0.8128170485297839
    num_agent_steps_sampled: 1065000
    num_agent_steps_trained: 1065000
    num_steps_sampled: 1065000
    num_steps_trained: 10650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1065,28386,1065000,-26.166,-21.6,-37.3,261.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1066000
  custom_metrics: {}
  date: 2021-10-29_04-59-54
  done: false
  episode_len_mean: 263.43
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.343000000000103
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 3
  episodes_total: 3562
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 1.1405612197187212
          entropy_coeff: 0.009999999999999998
          kl: 0.008779350877790075
          policy_loss: -0.11432188757591777
          total_loss: 1.7302011185222201
          vf_explained_var: 0.03479800745844841
          vf_loss: 1.8534415271547107
    num_agent_steps_sampled: 1066000
    num_agent_steps_trained: 1066000
    num_steps_sampled: 1066000
    num_steps_trained: 106600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1066,28411.2,1066000,-26.343,-21.6,-37.3,263.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1067000
  custom_metrics: {}
  date: 2021-10-29_05-00-22
  done: false
  episode_len_mean: 263.05
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.3050000000001
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3566
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28328911095051124
          cur_lr: 5.000000000000001e-05
          entropy: 1.0829904523160723
          entropy_coeff: 0.009999999999999998
          kl: 0.025695926505762464
          policy_loss: -0.03295203290051884
          total_loss: 1.4983641584714253
          vf_explained_var: 0.1087990254163742
          vf_loss: 1.5348667389816708
    num_agent_steps_sampled: 1067000
    num_agent_steps_trained: 1067000
    num_steps_sampled: 1067000
    num_steps_trained: 1067000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1067,28438.7,1067000,-26.305,-21.6,-37.3,263.05




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1068000
  custom_metrics: {}
  date: 2021-10-29_05-01-04
  done: false
  episode_len_mean: 263.14
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.3140000000001
  episode_reward_min: -37.30000000000026
  episodes_this_iter: 4
  episodes_total: 3570
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42493366642576674
          cur_lr: 5.000000000000001e-05
          entropy: 1.1259936160511441
          entropy_coeff: 0.009999999999999998
          kl: 0.015069743787405167
          policy_loss: 0.010010691897736654
          total_loss: 1.2158889929453531
          vf_explained_var: 0.3014225959777832
          vf_loss: 1.210734587907791
    num_agent_steps_sampled: 1068000
    num_agent_steps_trained: 1068000
    num_steps_sampled: 1068000
    num_steps_trained: 1068000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1068,28480.9,1068000,-26.314,-21.6,-37.3,263.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1069000
  custom_metrics: {}
  date: 2021-10-29_05-01-30
  done: false
  episode_len_mean: 265.44
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.54400000000011
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3573
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42493366642576674
          cur_lr: 5.000000000000001e-05
          entropy: 1.158867965804206
          entropy_coeff: 0.009999999999999998
          kl: 0.008956798747060348
          policy_loss: 0.12614334341552522
          total_loss: 0.4606889001197285
          vf_explained_var: 0.8554590940475464
          vf_loss: 0.34232819204529125
    num_agent_steps_sampled: 1069000
    num_agent_steps_trained: 1069000
    num_steps_sampled: 1069000
    num_steps_trained: 1069000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1069,28506.9,1069000,-26.544,-21.6,-41,265.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1070000
  custom_metrics: {}
  date: 2021-10-29_05-01-55
  done: false
  episode_len_mean: 266.83
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -26.68300000000011
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3576
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42493366642576674
          cur_lr: 5.000000000000001e-05
          entropy: 1.1134209778573778
          entropy_coeff: 0.009999999999999998
          kl: 0.016309861798727433
          policy_loss: -0.09932247234715355
          total_loss: 0.47953506542576685
          vf_explained_var: 0.7542998194694519
          vf_loss: 0.5830611381265852
    num_agent_steps_sampled: 1070000
    num_agent_steps_trained: 1070000
    num_steps_sampled: 1070000
    num_steps_trained: 1070000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1070,28532.3,1070000,-26.683,-21.6,-41,266.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1071000
  custom_metrics: {}
  date: 2021-10-29_05-02-23
  done: false
  episode_len_mean: 266.97
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.697000000000106
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3580
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42493366642576674
          cur_lr: 5.000000000000001e-05
          entropy: 0.9740472555160522
          entropy_coeff: 0.009999999999999998
          kl: 0.007718603140900863
          policy_loss: -0.018250751453969212
          total_loss: 0.4020501290758451
          vf_explained_var: 0.8716822862625122
          vf_loss: 0.42676146195994485
    num_agent_steps_sampled: 1071000
    num_agent_steps_trained: 1071000
    num_steps_sampled: 1071000
    num_steps_trained: 107100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1071,28560,1071000,-26.697,-21.9,-41,266.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1072000
  custom_metrics: {}
  date: 2021-10-29_05-02-50
  done: false
  episode_len_mean: 265.65
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.565000000000108
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3584
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42493366642576674
          cur_lr: 5.000000000000001e-05
          entropy: 0.8551960951752133
          entropy_coeff: 0.009999999999999998
          kl: 0.01875126432926611
          policy_loss: -0.02604392096400261
          total_loss: 0.7688196606106228
          vf_explained_var: 0.6967372298240662
          vf_loss: 0.7954474959108565
    num_agent_steps_sampled: 1072000
    num_agent_steps_trained: 1072000
    num_steps_sampled: 1072000
    num_steps_trained: 1072000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1072,28586.5,1072000,-26.565,-21.9,-41,265.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1073000
  custom_metrics: {}
  date: 2021-10-29_05-03-17
  done: false
  episode_len_mean: 265.28
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.528000000000105
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3588
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42493366642576674
          cur_lr: 5.000000000000001e-05
          entropy: 0.8311772770351834
          entropy_coeff: 0.009999999999999998
          kl: 0.008327655371298922
          policy_loss: 0.05078415332569016
          total_loss: 0.886106726858351
          vf_explained_var: 0.6169962882995605
          vf_loss: 0.8400956372419993
    num_agent_steps_sampled: 1073000
    num_agent_steps_trained: 1073000
    num_steps_sampled: 1073000
    num_steps_trained: 1073000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1073,28613.5,1073000,-26.528,-21.9,-41,265.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1074000
  custom_metrics: {}
  date: 2021-10-29_05-03-46
  done: false
  episode_len_mean: 265.46
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.546000000000102
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3592
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42493366642576674
          cur_lr: 5.000000000000001e-05
          entropy: 0.6681280056635539
          entropy_coeff: 0.009999999999999998
          kl: 0.004454171599169222
          policy_loss: 0.04143897576464547
          total_loss: 0.9198999716175927
          vf_explained_var: 0.5669339895248413
          vf_loss: 0.8832495517200893
    num_agent_steps_sampled: 1074000
    num_agent_steps_trained: 1074000
    num_steps_sampled: 1074000
    num_steps_trained: 1074000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1074,28642.8,1074000,-26.546,-21.9,-41,265.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1075000
  custom_metrics: {}
  date: 2021-10-29_05-04-11
  done: false
  episode_len_mean: 265.93
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.593000000000103
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3595
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21246683321288337
          cur_lr: 5.000000000000001e-05
          entropy: 1.0498429245418972
          entropy_coeff: 0.009999999999999998
          kl: 0.05983724396082045
          policy_loss: 0.014032313641574647
          total_loss: 0.6983601828416188
          vf_explained_var: 0.6324548125267029
          vf_loss: 0.6821128651499748
    num_agent_steps_sampled: 1075000
    num_agent_steps_trained: 1075000
    num_steps_sampled: 1075000
    num_steps_trained: 1075000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1075,28668.3,1075000,-26.593,-21.9,-41,265.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1076000
  custom_metrics: {}
  date: 2021-10-29_05-04-39
  done: false
  episode_len_mean: 267.05
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.70500000000011
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3599
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 0.9310469263129764
          entropy_coeff: 0.009999999999999998
          kl: 0.008776978808723296
          policy_loss: 0.03948151021161013
          total_loss: 0.7798975374963548
          vf_explained_var: 0.7037719488143921
          vf_loss: 0.7469292594326867
    num_agent_steps_sampled: 1076000
    num_agent_steps_trained: 1076000
    num_steps_sampled: 1076000
    num_steps_trained: 1076000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1076,28695.4,1076000,-26.705,-21.9,-41,267.05




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1077000
  custom_metrics: {}
  date: 2021-10-29_05-05-21
  done: false
  episode_len_mean: 267.16
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.71600000000011
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3602
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 1.0586156096723345
          entropy_coeff: 0.009999999999999998
          kl: 0.010508476339867753
          policy_loss: -0.0916019105248981
          total_loss: 0.920707177122434
          vf_explained_var: 0.5406774282455444
          vf_loss: 1.0195461995071835
    num_agent_steps_sampled: 1077000
    num_agent_steps_trained: 1077000
    num_steps_sampled: 1077000
    num_steps_trained: 1077000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1077,28738,1077000,-26.716,-21.9,-41,267.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1078000
  custom_metrics: {}
  date: 2021-10-29_05-05-46
  done: false
  episode_len_mean: 268.87
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -26.88700000000011
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3606
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 1.002551215224796
          entropy_coeff: 0.009999999999999998
          kl: 0.015176391762801878
          policy_loss: -0.02456302638683054
          total_loss: 1.0158621423774294
          vf_explained_var: 0.49573755264282227
          vf_loss: 1.0456139663855235
    num_agent_steps_sampled: 1078000
    num_agent_steps_trained: 1078000
    num_steps_sampled: 1078000
    num_steps_trained: 1078000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1078,28762.9,1078000,-26.887,-21.9,-41,268.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1079000
  custom_metrics: {}
  date: 2021-10-29_05-06-13
  done: false
  episode_len_mean: 270.04
  episode_media: {}
  episode_reward_max: -21.90000000000004
  episode_reward_mean: -27.004000000000115
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3609
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 0.9289779735936059
          entropy_coeff: 0.009999999999999998
          kl: 0.005342199052137491
          policy_loss: 0.03326952151126332
          total_loss: 1.0897191905313068
          vf_explained_var: -0.19003857672214508
          vf_loss: 1.0640368973215422
    num_agent_steps_sampled: 1079000
    num_agent_steps_trained: 1079000
    num_steps_sampled: 1079000
    num_steps_trained: 1079000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1079,28789.8,1079000,-27.004,-21.9,-41,270.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1080000
  custom_metrics: {}
  date: 2021-10-29_05-06-40
  done: false
  episode_len_mean: 270.43
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -27.043000000000117
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3613
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 0.9722609659036
          entropy_coeff: 0.009999999999999998
          kl: 0.01622063614594964
          policy_loss: 0.0779020331799984
          total_loss: 1.2493446244133843
          vf_explained_var: 0.48334184288978577
          vf_loss: 1.1759956823454962
    num_agent_steps_sampled: 1080000
    num_agent_steps_trained: 1080000
    num_steps_sampled: 1080000
    num_steps_trained: 1080000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1080,28817.1,1080000,-27.043,-22.9,-41,270.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1081000
  custom_metrics: {}
  date: 2021-10-29_05-07-07
  done: false
  episode_len_mean: 271.13
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -27.113000000000117
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3617
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 1.0162914978133308
          entropy_coeff: 0.009999999999999998
          kl: 0.012896021401292668
          policy_loss: 0.03510194582243761
          total_loss: 0.7730846977896161
          vf_explained_var: 0.724461019039154
          vf_loss: 0.7440357082419925
    num_agent_steps_sampled: 1081000
    num_agent_steps_trained: 1081000
    num_steps_sampled: 1081000
    num_steps_trained: 1081000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1081,28843.6,1081000,-27.113,-22.9,-41,271.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1082000
  custom_metrics: {}
  date: 2021-10-29_05-07-34
  done: false
  episode_len_mean: 271.36
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -27.136000000000113
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3620
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 0.9494938678211636
          entropy_coeff: 0.009999999999999998
          kl: 0.007920750306158592
          policy_loss: -0.11862931417094337
          total_loss: 0.9259801851378546
          vf_explained_var: 0.5039416551589966
          vf_loss: 1.0515800913174946
    num_agent_steps_sampled: 1082000
    num_agent_steps_trained: 1082000
    num_steps_sampled: 1082000
    num_steps_trained: 1082000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1082,28870.8,1082000,-27.136,-22.9,-41,271.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1083000
  custom_metrics: {}
  date: 2021-10-29_05-07-59
  done: false
  episode_len_mean: 272.6
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -27.26000000000012
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3624
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 1.1733210106690726
          entropy_coeff: 0.009999999999999998
          kl: 0.011245271814388422
          policy_loss: 0.09711346013678444
          total_loss: 0.7960020522276561
          vf_explained_var: 0.7189204096794128
          vf_loss: 0.7070379303561316
    num_agent_steps_sampled: 1083000
    num_agent_steps_trained: 1083000
    num_steps_sampled: 1083000
    num_steps_trained: 1083000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1083,28895.8,1083000,-27.26,-22.9,-41,272.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1084000
  custom_metrics: {}
  date: 2021-10-29_05-08-26
  done: false
  episode_len_mean: 273.42
  episode_media: {}
  episode_reward_max: -22.900000000000055
  episode_reward_mean: -27.342000000000116
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3627
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 1.2069936765564813
          entropy_coeff: 0.009999999999999998
          kl: 0.009783540375319906
          policy_loss: -0.04864163961675432
          total_loss: 0.7868479076359007
          vf_explained_var: 0.6571556925773621
          vf_loss: 0.8444414652056165
    num_agent_steps_sampled: 1084000
    num_agent_steps_trained: 1084000
    num_steps_sampled: 1084000
    num_steps_trained: 1084000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1084,28922.7,1084000,-27.342,-22.9,-41,273.42




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1085000
  custom_metrics: {}
  date: 2021-10-29_05-09-11
  done: false
  episode_len_mean: 274.17
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.417000000000122
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3631
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 0.925581572453181
          entropy_coeff: 0.009999999999999998
          kl: 0.009537807534452724
          policy_loss: -0.09233324395285712
          total_loss: 0.7961003995603986
          vf_explained_var: 0.7131668329238892
          vf_loss: 0.8946497579415639
    num_agent_steps_sampled: 1085000
    num_agent_steps_trained: 1085000
    num_steps_sampled: 1085000
    num_steps_trained: 1085000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1085,28967.8,1085000,-27.417,-21.8,-41,274.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1086000
  custom_metrics: {}
  date: 2021-10-29_05-09-37
  done: false
  episode_len_mean: 275.42
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.542000000000122
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3635
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 1.099712575144238
          entropy_coeff: 0.009999999999999998
          kl: 0.009768419337172436
          policy_loss: 0.048141349107027054
          total_loss: 0.5400514740910795
          vf_explained_var: 0.7970188856124878
          vf_loss: 0.49979405287239287
    num_agent_steps_sampled: 1086000
    num_agent_steps_trained: 1086000
    num_steps_sampled: 1086000
    num_steps_trained: 1086000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1086,28993.3,1086000,-27.542,-21.8,-41,275.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1087000
  custom_metrics: {}
  date: 2021-10-29_05-10-04
  done: false
  episode_len_mean: 276.2
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.620000000000122
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3638
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 0.9325038757589128
          entropy_coeff: 0.009999999999999998
          kl: 0.008734945704648014
          policy_loss: -0.10857585246364275
          total_loss: 0.927165710263782
          vf_explained_var: 0.50627201795578
          vf_loss: 1.0422827694151136
    num_agent_steps_sampled: 1087000
    num_agent_steps_trained: 1087000
    num_steps_sampled: 1087000
    num_steps_trained: 1087000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1087,29020.7,1087000,-27.62,-21.8,-41,276.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1088000
  custom_metrics: {}
  date: 2021-10-29_05-10-30
  done: false
  episode_len_mean: 276.89
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.689000000000124
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3642
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.31870024981932504
          cur_lr: 5.000000000000001e-05
          entropy: 1.0619989514350892
          entropy_coeff: 0.009999999999999998
          kl: 0.02760645812210293
          policy_loss: 0.06710577516092195
          total_loss: 0.6146246069007449
          vf_explained_var: 0.835832953453064
          vf_loss: 0.5493406414985657
    num_agent_steps_sampled: 1088000
    num_agent_steps_trained: 1088000
    num_steps_sampled: 1088000
    num_steps_trained: 1088000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1088,29046.6,1088000,-27.689,-21.8,-41,276.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1089000
  custom_metrics: {}
  date: 2021-10-29_05-10-56
  done: false
  episode_len_mean: 277.71
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.771000000000125
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3645
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47805037472898765
          cur_lr: 5.000000000000001e-05
          entropy: 0.9715975145498912
          entropy_coeff: 0.009999999999999998
          kl: 0.011111088292118154
          policy_loss: -0.02498788730137878
          total_loss: 0.6731448918581009
          vf_explained_var: 0.7264130711555481
          vf_loss: 0.7025370882617102
    num_agent_steps_sampled: 1089000
    num_agent_steps_trained: 1089000
    num_steps_sampled: 1089000
    num_steps_trained: 1089000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1089,29072,1089000,-27.771,-21.8,-41,277.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1090000
  custom_metrics: {}
  date: 2021-10-29_05-11-22
  done: false
  episode_len_mean: 278.41
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.84100000000012
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3649
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47805037472898765
          cur_lr: 5.000000000000001e-05
          entropy: 0.9128318753507402
          entropy_coeff: 0.009999999999999998
          kl: 0.009727677714917525
          policy_loss: 0.027288516548772655
          total_loss: 1.3765966958469815
          vf_explained_var: 0.4169806241989136
          vf_loss: 1.3537861830658382
    num_agent_steps_sampled: 1090000
    num_agent_steps_trained: 1090000
    num_steps_sampled: 1090000
    num_steps_trained: 1090000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1090,29098.1,1090000,-27.841,-21.8,-41,278.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1091000
  custom_metrics: {}
  date: 2021-10-29_05-11-48
  done: false
  episode_len_mean: 278.68
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.868000000000123
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3653
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47805037472898765
          cur_lr: 5.000000000000001e-05
          entropy: 0.9715068068769243
          entropy_coeff: 0.009999999999999998
          kl: 0.004963833009931307
          policy_loss: -7.36782948176066e-05
          total_loss: 1.2686657177077398
          vf_explained_var: 0.34859952330589294
          vf_loss: 1.2760815024375916
    num_agent_steps_sampled: 1091000
    num_agent_steps_trained: 1091000
    num_steps_sampled: 1091000
    num_steps_trained: 109100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1091,29125,1091000,-27.868,-21.8,-41,278.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1092000
  custom_metrics: {}
  date: 2021-10-29_05-12-15
  done: false
  episode_len_mean: 278.79
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.87900000000013
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3656
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23902518736449382
          cur_lr: 5.000000000000001e-05
          entropy: 1.1098796056376563
          entropy_coeff: 0.009999999999999998
          kl: 0.04953408025839615
          policy_loss: 0.03672836638159222
          total_loss: 0.655210325287448
          vf_explained_var: 0.6409853100776672
          vf_loss: 0.6177408585531844
    num_agent_steps_sampled: 1092000
    num_agent_steps_trained: 1092000
    num_steps_sampled: 1092000
    num_steps_trained: 1092000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1092,29151.2,1092000,-27.879,-21.8,-41,278.79




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1093000
  custom_metrics: {}
  date: 2021-10-29_05-12-57
  done: false
  episode_len_mean: 278.07
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.80700000000012
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3660
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3585377810467407
          cur_lr: 5.000000000000001e-05
          entropy: 1.1718103210131328
          entropy_coeff: 0.009999999999999998
          kl: 0.019049915265059763
          policy_loss: 0.01929779706729783
          total_loss: 0.864828708436754
          vf_explained_var: 0.6185548901557922
          vf_loss: 0.850418895483017
    num_agent_steps_sampled: 1093000
    num_agent_steps_trained: 1093000
    num_steps_sampled: 1093000
    num_steps_trained: 1093000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1093,29193.3,1093000,-27.807,-21.8,-41,278.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1094000
  custom_metrics: {}
  date: 2021-10-29_05-13-25
  done: false
  episode_len_mean: 277.58
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.758000000000123
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3663
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3585377810467407
          cur_lr: 5.000000000000001e-05
          entropy: 1.0760566724671259
          entropy_coeff: 0.009999999999999998
          kl: 0.036271578538026636
          policy_loss: -0.010771879719363319
          total_loss: 0.923715763952997
          vf_explained_var: 0.5451697707176208
          vf_loss: 0.9322434802850087
    num_agent_steps_sampled: 1094000
    num_agent_steps_trained: 1094000
    num_steps_sampled: 1094000
    num_steps_trained: 1094000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1094,29220.9,1094000,-27.758,-21.8,-41,277.58


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1095000
  custom_metrics: {}
  date: 2021-10-29_05-13-51
  done: false
  episode_len_mean: 278.8
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.880000000000123
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 4
  episodes_total: 3667
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5378066715701112
          cur_lr: 5.000000000000001e-05
          entropy: 1.1883358081181845
          entropy_coeff: 0.009999999999999998
          kl: 0.01491584857082893
          policy_loss: -0.08048945276273621
          total_loss: 0.7399282170666589
          vf_explained_var: 0.6612825989723206
          vf_loss: 0.8242791619565751
    num_agent_steps_sampled: 1095000
    num_agent_steps_trained: 1095000
    num_steps_sampled: 1095000
    num_steps_trained: 1095000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1095,29247.4,1095000,-27.88,-21.8,-41,278.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1096000
  custom_metrics: {}
  date: 2021-10-29_05-14-17
  done: false
  episode_len_mean: 279.63
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.963000000000125
  episode_reward_min: -41.00000000000031
  episodes_this_iter: 3
  episodes_total: 3670
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5378066715701112
          cur_lr: 5.000000000000001e-05
          entropy: 1.4543340033955043
          entropy_coeff: 0.009999999999999998
          kl: 0.0188631337321644
          policy_loss: 0.029103437728352017
          total_loss: 0.34935170635581014
          vf_explained_var: 0.784342348575592
          vf_loss: 0.32464689521325957
    num_agent_steps_sampled: 1096000
    num_agent_steps_trained: 1096000
    num_steps_sampled: 1096000
    num_steps_trained: 1096000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1096,29273,1096000,-27.963,-21.8,-41,279.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1097000
  custom_metrics: {}
  date: 2021-10-29_05-14-44
  done: false
  episode_len_mean: 277.63
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.76300000000012
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3674
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5378066715701112
          cur_lr: 5.000000000000001e-05
          entropy: 0.9634674774275885
          entropy_coeff: 0.009999999999999998
          kl: 0.023035555579561478
          policy_loss: 0.05680985649426778
          total_loss: 0.9928053975105285
          vf_explained_var: 0.6064713001251221
          vf_loss: 0.9332415382067363
    num_agent_steps_sampled: 1097000
    num_agent_steps_trained: 1097000
    num_steps_sampled: 1097000
    num_steps_trained: 1097000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1097,29300.1,1097000,-27.763,-21.8,-35.1,277.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1098000
  custom_metrics: {}
  date: 2021-10-29_05-15-11
  done: false
  episode_len_mean: 277.22
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.722000000000115
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 3
  episodes_total: 3677
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8067100073551666
          cur_lr: 5.000000000000001e-05
          entropy: 1.0536163965861003
          entropy_coeff: 0.009999999999999998
          kl: 0.010909630071004588
          policy_loss: -0.09009821232822206
          total_loss: 0.8824617673953374
          vf_explained_var: 0.6051803231239319
          vf_loss: 0.9742952406406402
    num_agent_steps_sampled: 1098000
    num_agent_steps_trained: 1098000
    num_steps_sampled: 1098000
    num_steps_trained: 1098000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1098,29327.5,1098000,-27.722,-21.8,-35.1,277.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1099000
  custom_metrics: {}
  date: 2021-10-29_05-15-38
  done: false
  episode_len_mean: 277.45
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.745000000000122
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3681
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8067100073551666
          cur_lr: 5.000000000000001e-05
          entropy: 0.9249926851855383
          entropy_coeff: 0.009999999999999998
          kl: 0.011383706315637059
          policy_loss: -0.039322187172042
          total_loss: 0.5054221207896868
          vf_explained_var: 0.7881606817245483
          vf_loss: 0.5448108808861838
    num_agent_steps_sampled: 1099000
    num_agent_steps_trained: 1099000
    num_steps_sampled: 1099000
    num_steps_trained: 1099000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1099,29354.7,1099000,-27.745,-21.8,-35.1,277.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1100000
  custom_metrics: {}
  date: 2021-10-29_05-16-07
  done: false
  episode_len_mean: 277.7
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.770000000000117
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3685
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8067100073551666
          cur_lr: 5.000000000000001e-05
          entropy: 1.0040455169147915
          entropy_coeff: 0.009999999999999998
          kl: 0.009741233750318696
          policy_loss: 0.019866768684652116
          total_loss: 0.8517014920711518
          vf_explained_var: 0.6382941603660583
          vf_loss: 0.8340168449613783
    num_agent_steps_sampled: 1100000
    num_agent_steps_trained: 1100000
    num_steps_sampled: 1100000
    num_steps_trained: 1100000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1100,29383,1100000,-27.77,-21.8,-35.1,277.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1101000
  custom_metrics: {}
  date: 2021-10-29_05-16-32
  done: false
  episode_len_mean: 278.1
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.810000000000127
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3689
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8067100073551666
          cur_lr: 5.000000000000001e-05
          entropy: 1.0329708788130019
          entropy_coeff: 0.009999999999999998
          kl: 0.009758055075543885
          policy_loss: 0.014070218635929955
          total_loss: 0.5250467489163081
          vf_explained_var: 0.8405464291572571
          vf_loss: 0.5134343213505215
    num_agent_steps_sampled: 1101000
    num_agent_steps_trained: 1101000
    num_steps_sampled: 1101000
    num_steps_trained: 1101000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1101,29408.3,1101000,-27.81,-21.8,-35.1,278.1




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1102000
  custom_metrics: {}
  date: 2021-10-29_05-17-16
  done: false
  episode_len_mean: 279.23
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.923000000000126
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 3
  episodes_total: 3692
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8067100073551666
          cur_lr: 5.000000000000001e-05
          entropy: 1.2138142479790581
          entropy_coeff: 0.009999999999999998
          kl: 0.0040303464847179764
          policy_loss: -0.02417313332358996
          total_loss: 0.8844361543655396
          vf_explained_var: 0.38394325971603394
          vf_loss: 0.9174961157970958
    num_agent_steps_sampled: 1102000
    num_agent_steps_trained: 1102000
    num_steps_sampled: 1102000
    num_steps_trained: 1102000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1102,29451.9,1102000,-27.923,-21.8,-35.1,279.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1103000
  custom_metrics: {}
  date: 2021-10-29_05-17-43
  done: false
  episode_len_mean: 278.19
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.819000000000123
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3696
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4033550036775833
          cur_lr: 5.000000000000001e-05
          entropy: 0.8634536034531064
          entropy_coeff: 0.009999999999999998
          kl: 0.014304709751222175
          policy_loss: 0.03577424701717165
          total_loss: 0.7780158076021406
          vf_explained_var: 0.7299281358718872
          vf_loss: 0.7451062265369627
    num_agent_steps_sampled: 1103000
    num_agent_steps_trained: 1103000
    num_steps_sampled: 1103000
    num_steps_trained: 1103000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1103,29479,1103000,-27.819,-21.8,-35.1,278.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1104000
  custom_metrics: {}
  date: 2021-10-29_05-18-10
  done: false
  episode_len_mean: 278.51
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.851000000000127
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3700
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4033550036775833
          cur_lr: 5.000000000000001e-05
          entropy: 0.9355109281010098
          entropy_coeff: 0.009999999999999998
          kl: 0.016249819260824475
          policy_loss: -0.0207449556224876
          total_loss: 0.9191517273585001
          vf_explained_var: 0.5632772445678711
          vf_loss: 0.942697341574563
    num_agent_steps_sampled: 1104000
    num_agent_steps_trained: 1104000
    num_steps_sampled: 1104000
    num_steps_trained: 1104000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1104,29506.4,1104000,-27.851,-21.8,-35.1,278.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1105000
  custom_metrics: {}
  date: 2021-10-29_05-18-39
  done: false
  episode_len_mean: 276.53
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.65300000000012
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3704
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4033550036775833
          cur_lr: 5.000000000000001e-05
          entropy: 0.7226570149262747
          entropy_coeff: 0.009999999999999998
          kl: 0.004614413767823875
          policy_loss: 0.002877508021063275
          total_loss: 0.9496110684341854
          vf_explained_var: 0.5175491571426392
          vf_loss: 0.9520988755755955
    num_agent_steps_sampled: 1105000
    num_agent_steps_trained: 1105000
    num_steps_sampled: 1105000
    num_steps_trained: 1105000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1105,29535.5,1105000,-27.653,-21.8,-35.1,276.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1106000
  custom_metrics: {}
  date: 2021-10-29_05-19-08
  done: false
  episode_len_mean: 275.39
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.53900000000012
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 3
  episodes_total: 3707
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20167750183879166
          cur_lr: 5.000000000000001e-05
          entropy: 0.8410863359769185
          entropy_coeff: 0.009999999999999998
          kl: 0.013649921016272475
          policy_loss: -0.10548818326658672
          total_loss: 1.0044133941332498
          vf_explained_var: 0.4950006604194641
          vf_loss: 1.1155595541000367
    num_agent_steps_sampled: 1106000
    num_agent_steps_trained: 1106000
    num_steps_sampled: 1106000
    num_steps_trained: 1106000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1106,29564.3,1106000,-27.539,-21.8,-35.1,275.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1107000
  custom_metrics: {}
  date: 2021-10-29_05-19-36
  done: false
  episode_len_mean: 274.23
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.423000000000126
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3711
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20167750183879166
          cur_lr: 5.000000000000001e-05
          entropy: 0.6666364106867049
          entropy_coeff: 0.009999999999999998
          kl: 0.012288130260171847
          policy_loss: -0.12947556384735637
          total_loss: 1.0106196489599015
          vf_explained_var: 0.5321016311645508
          vf_loss: 1.144283323817783
    num_agent_steps_sampled: 1107000
    num_agent_steps_trained: 1107000
    num_steps_sampled: 1107000
    num_steps_trained: 1107000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1107,29591.7,1107000,-27.423,-21.8,-35.1,274.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1108000
  custom_metrics: {}
  date: 2021-10-29_05-20-00
  done: false
  episode_len_mean: 275.81
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.58100000000012
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3715
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20167750183879166
          cur_lr: 5.000000000000001e-05
          entropy: 1.3348853866259256
          entropy_coeff: 0.009999999999999998
          kl: 0.04367408357046006
          policy_loss: -0.10253354112307231
          total_loss: 0.4147114134497113
          vf_explained_var: 0.8671632409095764
          vf_loss: 0.5217857225073709
    num_agent_steps_sampled: 1108000
    num_agent_steps_trained: 1108000
    num_steps_sampled: 1108000
    num_steps_trained: 1108000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1108,29616,1108000,-27.581,-21.8,-35.1,275.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1109000
  custom_metrics: {}
  date: 2021-10-29_05-20-25
  done: false
  episode_len_mean: 276.48
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.648000000000117
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 3
  episodes_total: 3718
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3025162527581875
          cur_lr: 5.000000000000001e-05
          entropy: 1.3562111006842719
          entropy_coeff: 0.009999999999999998
          kl: 0.028867042430484602
          policy_loss: 0.015055881730384297
          total_loss: 0.5078691330220965
          vf_explained_var: 0.7510710954666138
          vf_loss: 0.4976426098909643
    num_agent_steps_sampled: 1109000
    num_agent_steps_trained: 1109000
    num_steps_sampled: 1109000
    num_steps_trained: 1109000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1109,29640.9,1109000,-27.648,-21.8,-35.1,276.48




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1110000
  custom_metrics: {}
  date: 2021-10-29_05-21-12
  done: false
  episode_len_mean: 275.65
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.565000000000126
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3722
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4537743791372811
          cur_lr: 5.000000000000001e-05
          entropy: 0.7166807734304004
          entropy_coeff: 0.009999999999999998
          kl: 0.012031346512347222
          policy_loss: 0.0932132277223799
          total_loss: 0.8175375004609425
          vf_explained_var: 0.6109989881515503
          vf_loss: 0.7260315583811866
    num_agent_steps_sampled: 1110000
    num_agent_steps_trained: 1110000
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1110,29688,1110000,-27.565,-21.8,-35.1,275.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1111000
  custom_metrics: {}
  date: 2021-10-29_05-21-40
  done: false
  episode_len_mean: 275.16
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.516000000000123
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3726
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4537743791372811
          cur_lr: 5.000000000000001e-05
          entropy: 0.9849795983897315
          entropy_coeff: 0.009999999999999998
          kl: 0.03432716616332275
          policy_loss: 0.03449631358186404
          total_loss: 0.8683825814061694
          vf_explained_var: 0.45507270097732544
          vf_loss: 0.8281592749887042
    num_agent_steps_sampled: 1111000
    num_agent_steps_trained: 1111000
    num_steps_sampled: 1111000
    num_steps_trained: 1111000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1111,29715.9,1111000,-27.516,-21.8,-35.1,275.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1112000
  custom_metrics: {}
  date: 2021-10-29_05-22-08
  done: false
  episode_len_mean: 275.1
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.510000000000122
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3730
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.680661568705922
          cur_lr: 5.000000000000001e-05
          entropy: 1.0704535020722283
          entropy_coeff: 0.009999999999999998
          kl: 0.0061109670028170315
          policy_loss: 0.0047044011453787485
          total_loss: 0.9774763253000047
          vf_explained_var: 0.3256537914276123
          vf_loss: 0.9793169677257538
    num_agent_steps_sampled: 1112000
    num_agent_steps_trained: 1112000
    num_steps_sampled: 1112000
    num_steps_trained: 1112000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1112,29744.2,1112000,-27.51,-22.8,-35.1,275.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1113000
  custom_metrics: {}
  date: 2021-10-29_05-22-34
  done: false
  episode_len_mean: 274.98
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.49800000000012
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 3
  episodes_total: 3733
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.680661568705922
          cur_lr: 5.000000000000001e-05
          entropy: 1.446980404191547
          entropy_coeff: 0.009999999999999998
          kl: 0.0133566996070157
          policy_loss: 0.030260061886575488
          total_loss: 0.5741798571414418
          vf_explained_var: 0.6054461002349854
          vf_loss: 0.5492982038193279
    num_agent_steps_sampled: 1113000
    num_agent_steps_trained: 1113000
    num_steps_sampled: 1113000
    num_steps_trained: 1113000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1113,29770,1113000,-27.498,-22.8,-35.1,274.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1114000
  custom_metrics: {}
  date: 2021-10-29_05-23-00
  done: false
  episode_len_mean: 274.78
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -27.47800000000012
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 3
  episodes_total: 3736
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.680661568705922
          cur_lr: 5.000000000000001e-05
          entropy: 1.0760565472973718
          entropy_coeff: 0.009999999999999998
          kl: 0.01419698683587948
          policy_loss: -0.07164692659344939
          total_loss: 0.7494699954986572
          vf_explained_var: 0.608208954334259
          vf_loss: 0.822214146455129
    num_agent_steps_sampled: 1114000
    num_agent_steps_trained: 1114000
    num_steps_sampled: 1114000
    num_steps_trained: 1114000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1114,29795.6,1114000,-27.478,-22.8,-35.1,274.78


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1115000
  custom_metrics: {}
  date: 2021-10-29_05-23-27
  done: false
  episode_len_mean: 274.83
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.48300000000012
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3740
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.680661568705922
          cur_lr: 5.000000000000001e-05
          entropy: 1.0968614684210882
          entropy_coeff: 0.009999999999999998
          kl: 0.012242182297865235
          policy_loss: -0.029405508273177675
          total_loss: 0.8615820321771833
          vf_explained_var: 0.4431663155555725
          vf_loss: 0.893623376554913
    num_agent_steps_sampled: 1115000
    num_agent_steps_trained: 1115000
    num_steps_sampled: 1115000
    num_steps_trained: 1115000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1115,29822.9,1115000,-27.483,-22.1,-35.1,274.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1116000
  custom_metrics: {}
  date: 2021-10-29_05-23-53
  done: false
  episode_len_mean: 274.89
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.489000000000125
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3744
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.680661568705922
          cur_lr: 5.000000000000001e-05
          entropy: 1.1816961758666569
          entropy_coeff: 0.009999999999999998
          kl: 0.016015123832211327
          policy_loss: 0.014845127363999684
          total_loss: 0.668666625685162
          vf_explained_var: 0.6774097681045532
          vf_loss: 0.6547375837961833
    num_agent_steps_sampled: 1116000
    num_agent_steps_trained: 1116000
    num_steps_sampled: 1116000
    num_steps_trained: 1116000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1116,29848.7,1116000,-27.489,-22.1,-35.1,274.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1117000
  custom_metrics: {}
  date: 2021-10-29_05-24-22
  done: false
  episode_len_mean: 273.23
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -27.32300000000012
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3748
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.680661568705922
          cur_lr: 5.000000000000001e-05
          entropy: 0.6716602464516958
          entropy_coeff: 0.009999999999999998
          kl: 0.008763911215489707
          policy_loss: -0.010262225651078753
          total_loss: 1.0265402019023895
          vf_explained_var: 0.3643515408039093
          vf_loss: 1.0375537759727902
    num_agent_steps_sampled: 1117000
    num_agent_steps_trained: 1117000
    num_steps_sampled: 1117000
    num_steps_trained: 1117000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1117,29877.5,1117000,-27.323,-22.1,-35.1,273.23




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1118000
  custom_metrics: {}
  date: 2021-10-29_05-25-09
  done: false
  episode_len_mean: 272.07
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.207000000000118
  episode_reward_min: -35.10000000000023
  episodes_this_iter: 4
  episodes_total: 3752
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.680661568705922
          cur_lr: 5.000000000000001e-05
          entropy: 0.6724446806642744
          entropy_coeff: 0.009999999999999998
          kl: 0.00720150857592322
          policy_loss: 0.08328708037734031
          total_loss: 1.0950512919161055
          vf_explained_var: 0.3766153156757355
          vf_loss: 1.0135868787765503
    num_agent_steps_sampled: 1118000
    num_agent_steps_trained: 1118000
    num_steps_sampled: 1118000
    num_steps_trained: 1118000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1118,29925.3,1118000,-27.207,-21.5,-35.1,272.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1119000
  custom_metrics: {}
  date: 2021-10-29_05-25-35
  done: false
  episode_len_mean: 271.41
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.14100000000012
  episode_reward_min: -34.40000000000022
  episodes_this_iter: 4
  episodes_total: 3756
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.680661568705922
          cur_lr: 5.000000000000001e-05
          entropy: 0.8080586045980453
          entropy_coeff: 0.009999999999999998
          kl: 0.027416097128025285
          policy_loss: 0.004355668028195699
          total_loss: 1.0279949413405525
          vf_explained_var: 0.5172957181930542
          vf_loss: 1.013058778974745
    num_agent_steps_sampled: 1119000
    num_agent_steps_trained: 1119000
    num_steps_sampled: 1119000
    num_steps_trained: 1119000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1119,29951.2,1119000,-27.141,-21.5,-34.4,271.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1120000
  custom_metrics: {}
  date: 2021-10-29_05-26-04
  done: false
  episode_len_mean: 270.25
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.025000000000112
  episode_reward_min: -34.40000000000022
  episodes_this_iter: 4
  episodes_total: 3760
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0209923530588831
          cur_lr: 5.000000000000001e-05
          entropy: 0.3758864588207669
          entropy_coeff: 0.009999999999999998
          kl: 0.0012114013756244744
          policy_loss: 0.055936674608124626
          total_loss: 0.8706590705447727
          vf_explained_var: 0.3595789074897766
          vf_loss: 0.8172444383303324
    num_agent_steps_sampled: 1120000
    num_agent_steps_trained: 1120000
    num_steps_sampled: 1120000
    num_steps_trained: 1120000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1120,29980.2,1120000,-27.025,-21.5,-34.4,270.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1121000
  custom_metrics: {}
  date: 2021-10-29_05-26-32
  done: false
  episode_len_mean: 269.97
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.997000000000114
  episode_reward_min: -34.40000000000022
  episodes_this_iter: 3
  episodes_total: 3763
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5104961765294416
          cur_lr: 5.000000000000001e-05
          entropy: 0.725190336836709
          entropy_coeff: 0.009999999999999998
          kl: 0.003985905516127117
          policy_loss: -0.039461278998189506
          total_loss: 0.6932599984937244
          vf_explained_var: 0.4908829629421234
          vf_loss: 0.7379383895132277
    num_agent_steps_sampled: 1121000
    num_agent_steps_trained: 1121000
    num_steps_sampled: 1121000
    num_steps_trained: 1121000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1121,30007.4,1121000,-26.997,-21.5,-34.4,269.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1122000
  custom_metrics: {}
  date: 2021-10-29_05-26-56
  done: false
  episode_len_mean: 270.41
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.041000000000114
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 4
  episodes_total: 3767
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2552480882647208
          cur_lr: 5.000000000000001e-05
          entropy: 0.8668310168716643
          entropy_coeff: 0.009999999999999998
          kl: 0.018071128196972
          policy_loss: 0.030925398568312328
          total_loss: 1.2085298405753242
          vf_explained_var: 0.268023818731308
          vf_loss: 1.1816601362493304
    num_agent_steps_sampled: 1122000
    num_agent_steps_trained: 1122000
    num_steps_sampled: 1122000
    num_steps_trained: 1122000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1122,30032.2,1122000,-27.041,-21.5,-34.5,270.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1123000
  custom_metrics: {}
  date: 2021-10-29_05-27-22
  done: false
  episode_len_mean: 269.52
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -26.952000000000112
  episode_reward_min: -34.50000000000022
  episodes_this_iter: 3
  episodes_total: 3770
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2552480882647208
          cur_lr: 5.000000000000001e-05
          entropy: 0.5437009940544765
          entropy_coeff: 0.009999999999999998
          kl: 0.014880576444206648
          policy_loss: -0.022387436860137517
          total_loss: 1.007070138057073
          vf_explained_var: -0.019745856523513794
          vf_loss: 1.0310963521401086
    num_agent_steps_sampled: 1123000
    num_agent_steps_trained: 1123000
    num_steps_sampled: 1123000
    num_steps_trained: 11230

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1123,30058.2,1123000,-26.952,-21.5,-34.5,269.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1124000
  custom_metrics: {}
  date: 2021-10-29_05-27-46
  done: false
  episode_len_mean: 271.03
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.103000000000115
  episode_reward_min: -42.90000000000034
  episodes_this_iter: 3
  episodes_total: 3773
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2552480882647208
          cur_lr: 5.000000000000001e-05
          entropy: 0.9181776056687038
          entropy_coeff: 0.009999999999999998
          kl: 0.018556811328138186
          policy_loss: -0.08091992967658573
          total_loss: 0.7547415775557359
          vf_explained_var: 0.41075265407562256
          vf_loss: 0.840106694234742
    num_agent_steps_sampled: 1124000
    num_agent_steps_trained: 1124000
    num_steps_sampled: 1124000
    num_steps_trained: 1124000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1124,30082.2,1124000,-27.103,-21.5,-42.9,271.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1125000
  custom_metrics: {}
  date: 2021-10-29_05-28-07
  done: false
  episode_len_mean: 273.55
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.355000000000114
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 3
  episodes_total: 3776
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2552480882647208
          cur_lr: 5.000000000000001e-05
          entropy: 0.6628360059526232
          entropy_coeff: 0.009999999999999998
          kl: 0.015144547585210293
          policy_loss: 0.012013724446296692
          total_loss: 0.7500671807262632
          vf_explained_var: 0.11713326722383499
          vf_loss: 0.7408161973787679
    num_agent_steps_sampled: 1125000
    num_agent_steps_trained: 1125000
    num_steps_sampled: 1125000
    num_steps_trained: 112500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1125,30102.4,1125000,-27.355,-21.5,-51.7,273.55




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1126000
  custom_metrics: {}
  date: 2021-10-29_05-28-50
  done: false
  episode_len_mean: 274.21
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.42100000000012
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3780
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2552480882647208
          cur_lr: 5.000000000000001e-05
          entropy: 0.9495004028081894
          entropy_coeff: 0.009999999999999998
          kl: 0.0115224774678383
          policy_loss: 0.026694999635219575
          total_loss: 1.076880333158705
          vf_explained_var: 0.3915095031261444
          vf_loss: 1.0567392461829714
    num_agent_steps_sampled: 1126000
    num_agent_steps_trained: 1126000
    num_steps_sampled: 1126000
    num_steps_trained: 1126000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1126,30145.9,1126000,-27.421,-21.1,-51.7,274.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1127000
  custom_metrics: {}
  date: 2021-10-29_05-29-18
  done: false
  episode_len_mean: 274.11
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.41100000000012
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 3
  episodes_total: 3783
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2552480882647208
          cur_lr: 5.000000000000001e-05
          entropy: 0.6408655749426948
          entropy_coeff: 0.009999999999999998
          kl: 0.004210366457579716
          policy_loss: -0.0749121884504954
          total_loss: 0.9629860848188401
          vf_explained_var: 0.617144763469696
          vf_loss: 1.0432322484751542
    num_agent_steps_sampled: 1127000
    num_agent_steps_trained: 1127000
    num_steps_sampled: 1127000
    num_steps_trained: 1127000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1127,30173.4,1127000,-27.411,-21.1,-51.7,274.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1128000
  custom_metrics: {}
  date: 2021-10-29_05-29-45
  done: false
  episode_len_mean: 274.11
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.41100000000012
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3787
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1276240441323604
          cur_lr: 5.000000000000001e-05
          entropy: 0.7271842526064979
          entropy_coeff: 0.009999999999999998
          kl: 0.016063174113547043
          policy_loss: -0.03165252680579821
          total_loss: 1.0703065905306075
          vf_explained_var: 0.5313066244125366
          vf_loss: 1.1071809225612217
    num_agent_steps_sampled: 1128000
    num_agent_steps_trained: 1128000
    num_steps_sampled: 1128000
    num_steps_trained: 1128000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1128,30201.1,1128000,-27.411,-21.1,-51.7,274.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1129000
  custom_metrics: {}
  date: 2021-10-29_05-30-10
  done: false
  episode_len_mean: 273.81
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.381000000000114
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3791
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1276240441323604
          cur_lr: 5.000000000000001e-05
          entropy: 0.7596332308318879
          entropy_coeff: 0.009999999999999998
          kl: 0.029604484954929452
          policy_loss: 0.04965181897083918
          total_loss: 0.8794916106594933
          vf_explained_var: 0.6226762533187866
          vf_loss: 0.8336578842666414
    num_agent_steps_sampled: 1129000
    num_agent_steps_trained: 1129000
    num_steps_sampled: 1129000
    num_steps_trained: 1129000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1129,30225.4,1129000,-27.381,-21.1,-51.7,273.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1130000
  custom_metrics: {}
  date: 2021-10-29_05-30-37
  done: false
  episode_len_mean: 274.38
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.43800000000012
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 3
  episodes_total: 3794
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19143606619854053
          cur_lr: 5.000000000000001e-05
          entropy: 1.1124920745690663
          entropy_coeff: 0.009999999999999998
          kl: 0.011030001146215796
          policy_loss: 0.02307727684577306
          total_loss: 0.4722336596912808
          vf_explained_var: 0.759286642074585
          vf_loss: 0.45816977073748905
    num_agent_steps_sampled: 1130000
    num_agent_steps_trained: 1130000
    num_steps_sampled: 1130000
    num_steps_trained: 1130000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1130,30252.1,1130000,-27.438,-21.1,-51.7,274.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1131000
  custom_metrics: {}
  date: 2021-10-29_05-31-04
  done: false
  episode_len_mean: 274.42
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.44200000000012
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3798
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19143606619854053
          cur_lr: 5.000000000000001e-05
          entropy: 0.6426956686708662
          entropy_coeff: 0.009999999999999998
          kl: 0.003512758435174214
          policy_loss: 0.03418372687366274
          total_loss: 0.9472678383191426
          vf_explained_var: 0.5444052219390869
          vf_loss: 0.9188385936948988
    num_agent_steps_sampled: 1131000
    num_agent_steps_trained: 1131000
    num_steps_sampled: 1131000
    num_steps_trained: 1131000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1131,30279.5,1131000,-27.442,-21.1,-51.7,274.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1132000
  custom_metrics: {}
  date: 2021-10-29_05-31-32
  done: false
  episode_len_mean: 274.27
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.427000000000117
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3802
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09571803309927027
          cur_lr: 5.000000000000001e-05
          entropy: 0.7395527842972014
          entropy_coeff: 0.009999999999999998
          kl: 0.04827754551458005
          policy_loss: 0.030884369793865416
          total_loss: 1.2143744428952534
          vf_explained_var: 0.3625450134277344
          vf_loss: 1.1862645672427283
    num_agent_steps_sampled: 1132000
    num_agent_steps_trained: 1132000
    num_steps_sampled: 1132000
    num_steps_trained: 1132000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1132,30307.1,1132000,-27.427,-21.1,-51.7,274.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1133000
  custom_metrics: {}
  date: 2021-10-29_05-32-02
  done: false
  episode_len_mean: 273.7
  episode_media: {}
  episode_reward_max: -21.10000000000003
  episode_reward_mean: -27.37000000000013
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3806
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14357704964890544
          cur_lr: 5.000000000000001e-05
          entropy: 0.20522604650921292
          entropy_coeff: 0.009999999999999998
          kl: 0.0033001018020620953
          policy_loss: 0.02395323585304949
          total_loss: 1.5193548931015863
          vf_explained_var: 0.1157919242978096
          vf_loss: 1.4969800988833109
    num_agent_steps_sampled: 1133000
    num_agent_steps_trained: 1133000
    num_steps_sampled: 1133000
    num_steps_trained: 1133000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1133,30337.4,1133000,-27.37,-21.1,-51.7,273.7




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1134000
  custom_metrics: {}
  date: 2021-10-29_05-32-47
  done: false
  episode_len_mean: 273.6
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -27.360000000000124
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3810
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07178852482445272
          cur_lr: 5.000000000000001e-05
          entropy: 0.8022771500878864
          entropy_coeff: 0.009999999999999998
          kl: 0.020802090095336387
          policy_loss: 0.09034408231576284
          total_loss: 1.0529742591910891
          vf_explained_var: 0.5095648765563965
          vf_loss: 0.9691595845752292
    num_agent_steps_sampled: 1134000
    num_agent_steps_trained: 1134000
    num_steps_sampled: 1134000
    num_steps_trained: 1134000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1134,30382.2,1134000,-27.36,-20.9,-51.7,273.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1135000
  custom_metrics: {}
  date: 2021-10-29_05-33-18
  done: false
  episode_len_mean: 271.82
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -27.182000000000116
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3814
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10768278723667905
          cur_lr: 5.000000000000001e-05
          entropy: 0.5745913396279018
          entropy_coeff: 0.009999999999999998
          kl: 0.06293456315782638
          policy_loss: 0.0281367723726564
          total_loss: 1.0920228603813382
          vf_explained_var: 0.44688522815704346
          vf_loss: 1.062855033079783
    num_agent_steps_sampled: 1135000
    num_agent_steps_trained: 1135000
    num_steps_sampled: 1135000
    num_steps_trained: 1135000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1135,30413.7,1135000,-27.182,-20.9,-51.7,271.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1136000
  custom_metrics: {}
  date: 2021-10-29_05-33-48
  done: false
  episode_len_mean: 269.44
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -26.944000000000116
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3818
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16152418085501852
          cur_lr: 5.000000000000001e-05
          entropy: 0.6102695430318514
          entropy_coeff: 0.009999999999999998
          kl: 0.003529493442965285
          policy_loss: 0.02078770407372051
          total_loss: 1.1642926103538938
          vf_explained_var: 0.37635836005210876
          vf_loss: 1.1490374823411307
    num_agent_steps_sampled: 1136000
    num_agent_steps_trained: 1136000
    num_steps_sampled: 1136000
    num_steps_trained: 113600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1136,30443.9,1136000,-26.944,-20.9,-51.7,269.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1137000
  custom_metrics: {}
  date: 2021-10-29_05-34-17
  done: false
  episode_len_mean: 269.86
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -26.986000000000114
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3822
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08076209042750926
          cur_lr: 5.000000000000001e-05
          entropy: 0.9330375697877672
          entropy_coeff: 0.009999999999999998
          kl: 0.031096330535596312
          policy_loss: -0.004925883975293901
          total_loss: 1.2339578184816573
          vf_explained_var: 0.4269798994064331
          vf_loss: 1.245702681938807
    num_agent_steps_sampled: 1137000
    num_agent_steps_trained: 1137000
    num_steps_sampled: 1137000
    num_steps_trained: 113700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1137,30472.9,1137000,-26.986,-20.9,-51.7,269.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1138000
  custom_metrics: {}
  date: 2021-10-29_05-34-48
  done: false
  episode_len_mean: 267.97
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -26.797000000000104
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 5
  episodes_total: 3827
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12114313564126393
          cur_lr: 5.000000000000001e-05
          entropy: 0.6406849806507429
          entropy_coeff: 0.009999999999999998
          kl: 0.03523456500443274
          policy_loss: -0.006555460145076116
          total_loss: 1.3437310642666287
          vf_explained_var: 0.18481700122356415
          vf_loss: 1.352424944109387
    num_agent_steps_sampled: 1138000
    num_agent_steps_trained: 1138000
    num_steps_sampled: 1138000
    num_steps_trained: 113800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1138,30503.1,1138000,-26.797,-20.9,-51.7,267.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1139000
  custom_metrics: {}
  date: 2021-10-29_05-35-17
  done: false
  episode_len_mean: 267.22
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -26.722000000000108
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3831
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18171470346189586
          cur_lr: 5.000000000000001e-05
          entropy: 0.6878773904509015
          entropy_coeff: 0.009999999999999998
          kl: 0.023862891567777093
          policy_loss: 0.06659080394440227
          total_loss: 1.0465064817004734
          vf_explained_var: 0.4039044678211212
          vf_loss: 0.9824582212501102
    num_agent_steps_sampled: 1139000
    num_agent_steps_trained: 1139000
    num_steps_sampled: 1139000
    num_steps_trained: 1139000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1139,30532.4,1139000,-26.722,-20.9,-51.7,267.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1140000
  custom_metrics: {}
  date: 2021-10-29_05-35-48
  done: false
  episode_len_mean: 264.41
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -26.44100000000011
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3835
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27257205519284383
          cur_lr: 5.000000000000001e-05
          entropy: 0.5642259680562549
          entropy_coeff: 0.009999999999999998
          kl: 0.004751594100884975
          policy_loss: 0.04034244600269529
          total_loss: 0.9461979515022702
          vf_explained_var: 0.2666768431663513
          vf_loss: 0.9102026161220339
    num_agent_steps_sampled: 1140000
    num_agent_steps_trained: 1140000
    num_steps_sampled: 1140000
    num_steps_trained: 1140000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1140,30563.4,1140000,-26.441,-20.9,-51.7,264.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1141000
  custom_metrics: {}
  date: 2021-10-29_05-36-20
  done: false
  episode_len_mean: 262.97
  episode_media: {}
  episode_reward_max: -20.900000000000027
  episode_reward_mean: -26.297000000000104
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3839
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13628602759642192
          cur_lr: 5.000000000000001e-05
          entropy: 0.4173687621951103
          entropy_coeff: 0.009999999999999998
          kl: 0.0034445127226717886
          policy_loss: 0.019643797973791757
          total_loss: 1.0145813233322567
          vf_explained_var: 0.31260207295417786
          vf_loss: 0.9986417657799191
    num_agent_steps_sampled: 1141000
    num_agent_steps_trained: 1141000
    num_steps_sampled: 1141000
    num_steps_trained: 1141

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1141,30595,1141000,-26.297,-20.9,-51.7,262.97




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1142000
  custom_metrics: {}
  date: 2021-10-29_05-37-06
  done: false
  episode_len_mean: 261.3
  episode_media: {}
  episode_reward_max: -20.000000000000014
  episode_reward_mean: -26.130000000000095
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3843
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06814301379821096
          cur_lr: 5.000000000000001e-05
          entropy: 0.5652915765841802
          entropy_coeff: 0.009999999999999998
          kl: 0.04192889261681027
          policy_loss: -0.09195203209916751
          total_loss: 1.3638264073265924
          vf_explained_var: 0.2548562288284302
          vf_loss: 1.4585741957028706
    num_agent_steps_sampled: 1142000
    num_agent_steps_trained: 1142000
    num_steps_sampled: 1142000
    num_steps_trained: 1142000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1142,30641.4,1142000,-26.13,-20,-51.7,261.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1143000
  custom_metrics: {}
  date: 2021-10-29_05-37-36
  done: false
  episode_len_mean: 260.29
  episode_media: {}
  episode_reward_max: -20.000000000000014
  episode_reward_mean: -26.029000000000106
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 5
  episodes_total: 3848
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10221452069731642
          cur_lr: 5.000000000000001e-05
          entropy: 0.7627175437079535
          entropy_coeff: 0.009999999999999998
          kl: 0.03351608911390337
          policy_loss: -0.022247586647669473
          total_loss: 1.0767295433415307
          vf_explained_var: 0.5025903582572937
          vf_loss: 1.1031784839100307
    num_agent_steps_sampled: 1143000
    num_agent_steps_trained: 1143000
    num_steps_sampled: 1143000
    num_steps_trained: 114300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1143,30671.7,1143000,-26.029,-20,-51.7,260.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1144000
  custom_metrics: {}
  date: 2021-10-29_05-38-07
  done: false
  episode_len_mean: 260.3
  episode_media: {}
  episode_reward_max: -20.000000000000014
  episode_reward_mean: -26.030000000000104
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3852
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15332178104597463
          cur_lr: 5.000000000000001e-05
          entropy: 0.659715034895473
          entropy_coeff: 0.009999999999999998
          kl: 0.004885393274672841
          policy_loss: 0.02447891616159015
          total_loss: 0.757293305794398
          vf_explained_var: 0.6260095238685608
          vf_loss: 0.7386624965402815
    num_agent_steps_sampled: 1144000
    num_agent_steps_trained: 1144000
    num_steps_sampled: 1144000
    num_steps_trained: 1144000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1144,30702.1,1144000,-26.03,-20,-51.7,260.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1145000
  custom_metrics: {}
  date: 2021-10-29_05-38-39
  done: false
  episode_len_mean: 258.87
  episode_media: {}
  episode_reward_max: -20.000000000000014
  episode_reward_mean: -25.8870000000001
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3856
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07666089052298732
          cur_lr: 5.000000000000001e-05
          entropy: 0.4914440484510528
          entropy_coeff: 0.009999999999999998
          kl: 0.00898943959483647
          policy_loss: 0.023318356772263845
          total_loss: 1.0242854091856215
          vf_explained_var: 0.42966699600219727
          vf_loss: 1.005192357632849
    num_agent_steps_sampled: 1145000
    num_agent_steps_trained: 1145000
    num_steps_sampled: 1145000
    num_steps_trained: 1145000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1145,30733.9,1145000,-25.887,-20,-51.7,258.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1146000
  custom_metrics: {}
  date: 2021-10-29_05-39-09
  done: false
  episode_len_mean: 258.12
  episode_media: {}
  episode_reward_max: -20.000000000000014
  episode_reward_mean: -25.812000000000094
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3860
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07666089052298732
          cur_lr: 5.000000000000001e-05
          entropy: 0.6643575292494562
          entropy_coeff: 0.009999999999999998
          kl: 0.022882245864793305
          policy_loss: -0.05523558821943071
          total_loss: 0.7146337628364563
          vf_explained_var: 0.45223358273506165
          vf_loss: 0.774758744902081
    num_agent_steps_sampled: 1146000
    num_agent_steps_trained: 1146000
    num_steps_sampled: 1146000
    num_steps_trained: 114600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1146,30764,1146000,-25.812,-20,-51.7,258.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1147000
  custom_metrics: {}
  date: 2021-10-29_05-39-36
  done: false
  episode_len_mean: 256.82
  episode_media: {}
  episode_reward_max: -20.000000000000014
  episode_reward_mean: -25.682000000000095
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3864
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11499133578448097
          cur_lr: 5.000000000000001e-05
          entropy: 0.9914043698045942
          entropy_coeff: 0.009999999999999998
          kl: 0.02348879922841544
          policy_loss: 0.034424439486530095
          total_loss: 0.7693240231937832
          vf_explained_var: 0.43684226274490356
          vf_loss: 0.7421126128070884
    num_agent_steps_sampled: 1147000
    num_agent_steps_trained: 1147000
    num_steps_sampled: 1147000
    num_steps_trained: 114700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1147,30791.3,1147000,-25.682,-20,-51.7,256.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1148000
  custom_metrics: {}
  date: 2021-10-29_05-40-06
  done: false
  episode_len_mean: 254.34
  episode_media: {}
  episode_reward_max: -20.000000000000014
  episode_reward_mean: -25.434000000000093
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 5
  episodes_total: 3869
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17248700367672146
          cur_lr: 5.000000000000001e-05
          entropy: 0.7042800645033519
          entropy_coeff: 0.009999999999999998
          kl: 0.01791078869144095
          policy_loss: -0.06133673944407039
          total_loss: 0.799697439538108
          vf_explained_var: 0.5387468338012695
          vf_loss: 0.86498760316107
    num_agent_steps_sampled: 1148000
    num_agent_steps_trained: 1148000
    num_steps_sampled: 1148000
    num_steps_trained: 1148000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1148,30821.6,1148000,-25.434,-20,-51.7,254.34




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1149000
  custom_metrics: {}
  date: 2021-10-29_05-40-55
  done: false
  episode_len_mean: 251.52
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -25.152000000000093
  episode_reward_min: -51.700000000000465
  episodes_this_iter: 4
  episodes_total: 3873
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17248700367672146
          cur_lr: 5.000000000000001e-05
          entropy: 0.7784876558515761
          entropy_coeff: 0.009999999999999998
          kl: 0.02246094959501161
          policy_loss: 0.006101353466510773
          total_loss: 0.9979003535376655
          vf_explained_var: 0.28756558895111084
          vf_loss: 0.995709662967258
    num_agent_steps_sampled: 1149000
    num_agent_steps_trained: 1149000
    num_steps_sampled: 1149000
    num_steps_trained: 1149000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1149,30870.3,1149000,-25.152,-19.9,-51.7,251.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1150000
  custom_metrics: {}
  date: 2021-10-29_05-41-24
  done: false
  episode_len_mean: 247.17
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.717000000000084
  episode_reward_min: -32.30000000000019
  episodes_this_iter: 4
  episodes_total: 3877
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2587305055150822
          cur_lr: 5.000000000000001e-05
          entropy: 0.6427956607606676
          entropy_coeff: 0.009999999999999998
          kl: 0.03194717193847292
          policy_loss: -0.014005386332670847
          total_loss: 0.6598162833187315
          vf_explained_var: 0.5480186343193054
          vf_loss: 0.6719839168919457
    num_agent_steps_sampled: 1150000
    num_agent_steps_trained: 1150000
    num_steps_sampled: 1150000
    num_steps_trained: 1150000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1150,30899.1,1150000,-24.717,-19.9,-32.3,247.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1151000
  custom_metrics: {}
  date: 2021-10-29_05-41-54
  done: false
  episode_len_mean: 245.86
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.586000000000084
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 3881
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3880957582726234
          cur_lr: 5.000000000000001e-05
          entropy: 0.7290101432138019
          entropy_coeff: 0.009999999999999998
          kl: 0.01534153556775798
          policy_loss: 0.06478782503141298
          total_loss: 0.8675925374031067
          vf_explained_var: 0.565616250038147
          vf_loss: 0.8041408307022518
    num_agent_steps_sampled: 1151000
    num_agent_steps_trained: 1151000
    num_steps_sampled: 1151000
    num_steps_trained: 1151000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1151,30929.5,1151000,-24.586,-19.9,-31.4,245.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1152000
  custom_metrics: {}
  date: 2021-10-29_05-42-23
  done: false
  episode_len_mean: 245.53
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.553000000000083
  episode_reward_min: -31.400000000000176
  episodes_this_iter: 4
  episodes_total: 3885
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3880957582726234
          cur_lr: 5.000000000000001e-05
          entropy: 0.6451862461037106
          entropy_coeff: 0.009999999999999998
          kl: 0.014768433119845051
          policy_loss: 0.03845851586924659
          total_loss: 0.8742168366909027
          vf_explained_var: 0.5328852534294128
          vf_loss: 0.8364786280526055
    num_agent_steps_sampled: 1152000
    num_agent_steps_trained: 1152000
    num_steps_sampled: 1152000
    num_steps_trained: 1152000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1152,30958.1,1152000,-24.553,-19.9,-31.4,245.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1153000
  custom_metrics: {}
  date: 2021-10-29_05-42-51
  done: false
  episode_len_mean: 245.47
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.547000000000075
  episode_reward_min: -32.000000000000185
  episodes_this_iter: 4
  episodes_total: 3889
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3880957582726234
          cur_lr: 5.000000000000001e-05
          entropy: 1.0408296876483494
          entropy_coeff: 0.009999999999999998
          kl: 0.010260558513311145
          policy_loss: 0.029340672824117873
          total_loss: 0.7984295970863766
          vf_explained_var: 0.4946923553943634
          vf_loss: 0.7755151338047451
    num_agent_steps_sampled: 1153000
    num_agent_steps_trained: 1153000
    num_steps_sampled: 1153000
    num_steps_trained: 1153000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1153,30986.3,1153000,-24.547,-19.9,-32,245.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1154000
  custom_metrics: {}
  date: 2021-10-29_05-43-19
  done: false
  episode_len_mean: 244.7
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.47000000000008
  episode_reward_min: -32.000000000000185
  episodes_this_iter: 4
  episodes_total: 3893
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3880957582726234
          cur_lr: 5.000000000000001e-05
          entropy: 1.0980674035019344
          entropy_coeff: 0.009999999999999998
          kl: 0.013306843350374227
          policy_loss: 0.03868003620041741
          total_loss: 0.5536271076235506
          vf_explained_var: 0.6069768071174622
          vf_loss: 0.5207634184095594
    num_agent_steps_sampled: 1154000
    num_agent_steps_trained: 1154000
    num_steps_sampled: 1154000
    num_steps_trained: 1154000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1154,31014.3,1154000,-24.47,-19.9,-32,244.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1155000
  custom_metrics: {}
  date: 2021-10-29_05-43-47
  done: false
  episode_len_mean: 243.81
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.381000000000075
  episode_reward_min: -32.000000000000185
  episodes_this_iter: 4
  episodes_total: 3897
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3880957582726234
          cur_lr: 5.000000000000001e-05
          entropy: 0.7451072325309117
          entropy_coeff: 0.009999999999999998
          kl: 0.008779550198310284
          policy_loss: 0.008361942734983233
          total_loss: 0.6004037194781833
          vf_explained_var: 0.5943359136581421
          vf_loss: 0.5960855407847299
    num_agent_steps_sampled: 1155000
    num_agent_steps_trained: 1155000
    num_steps_sampled: 1155000
    num_steps_trained: 1155000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1155,31042.4,1155000,-24.381,-19.9,-32,243.81




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1156000
  custom_metrics: {}
  date: 2021-10-29_05-44-33
  done: false
  episode_len_mean: 243.02
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.302000000000074
  episode_reward_min: -32.000000000000185
  episodes_this_iter: 4
  episodes_total: 3901
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3880957582726234
          cur_lr: 5.000000000000001e-05
          entropy: 1.0510591434107885
          entropy_coeff: 0.009999999999999998
          kl: 0.016111533920070857
          policy_loss: 0.020783504595359166
          total_loss: 1.3062388685014512
          vf_explained_var: 0.32114818692207336
          vf_loss: 1.289713121785058
    num_agent_steps_sampled: 1156000
    num_agent_steps_trained: 1156000
    num_steps_sampled: 1156000
    num_steps_trained: 1156000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1156,31087.7,1156000,-24.302,-19.9,-32,243.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1157000
  custom_metrics: {}
  date: 2021-10-29_05-45-02
  done: false
  episode_len_mean: 243.31
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.33100000000008
  episode_reward_min: -32.000000000000185
  episodes_this_iter: 4
  episodes_total: 3905
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3880957582726234
          cur_lr: 5.000000000000001e-05
          entropy: 0.640172317955229
          entropy_coeff: 0.009999999999999998
          kl: 0.010968076202400015
          policy_loss: 0.06872230105929905
          total_loss: 0.6880891727076637
          vf_explained_var: 0.7024187445640564
          vf_loss: 0.6215119302272797
    num_agent_steps_sampled: 1157000
    num_agent_steps_trained: 1157000
    num_steps_sampled: 1157000
    num_steps_trained: 1157000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1157,31117.2,1157000,-24.331,-19.9,-32,243.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1158000
  custom_metrics: {}
  date: 2021-10-29_05-45-32
  done: false
  episode_len_mean: 242.97
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.29700000000008
  episode_reward_min: -32.000000000000185
  episodes_this_iter: 4
  episodes_total: 3909
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3880957582726234
          cur_lr: 5.000000000000001e-05
          entropy: 0.6704291068845325
          entropy_coeff: 0.009999999999999998
          kl: 0.006592562891213068
          policy_loss: 0.006318644516997867
          total_loss: 0.8810588659511672
          vf_explained_var: 0.5161808729171753
          vf_loss: 0.8788859619034661
    num_agent_steps_sampled: 1158000
    num_agent_steps_trained: 1158000
    num_steps_sampled: 1158000
    num_steps_trained: 1158000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1158,31146.9,1158000,-24.297,-19.9,-32,242.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1159000
  custom_metrics: {}
  date: 2021-10-29_05-46-02
  done: false
  episode_len_mean: 242.55
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.255000000000077
  episode_reward_min: -32.000000000000185
  episodes_this_iter: 4
  episodes_total: 3913
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3880957582726234
          cur_lr: 5.000000000000001e-05
          entropy: 0.39031883544392054
          entropy_coeff: 0.009999999999999998
          kl: 0.0021666515463470166
          policy_loss: 0.06329489606950019
          total_loss: 1.0273434791300031
          vf_explained_var: 0.32023537158966064
          vf_loss: 0.9671108954482608
    num_agent_steps_sampled: 1159000
    num_agent_steps_trained: 1159000
    num_steps_sampled: 1159000
    num_steps_trained: 11590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1159,31177.5,1159000,-24.255,-19.9,-32,242.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1160000
  custom_metrics: {}
  date: 2021-10-29_05-46-31
  done: false
  episode_len_mean: 243.55
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.35500000000008
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3917
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1940478791363117
          cur_lr: 5.000000000000001e-05
          entropy: 0.9999087240960863
          entropy_coeff: 0.009999999999999998
          kl: 0.04680672320456394
          policy_loss: -0.010772606978813807
          total_loss: 1.1343341132005056
          vf_explained_var: 0.4453059434890747
          vf_loss: 1.1460230661763084
    num_agent_steps_sampled: 1160000
    num_agent_steps_trained: 1160000
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1160,31205.6,1160000,-24.355,-19.9,-32.6,243.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1161000
  custom_metrics: {}
  date: 2021-10-29_05-47-00
  done: false
  episode_len_mean: 242.88
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.288000000000075
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3921
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2910718187044674
          cur_lr: 5.000000000000001e-05
          entropy: 0.7733481258153916
          entropy_coeff: 0.009999999999999998
          kl: 0.007528696924564768
          policy_loss: 0.031952120115359625
          total_loss: 0.6815570169024997
          vf_explained_var: 0.6632463335990906
          vf_loss: 0.6551469951868057
    num_agent_steps_sampled: 1161000
    num_agent_steps_trained: 1161000
    num_steps_sampled: 1161000
    num_steps_trained: 1161000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1161,31235.3,1161000,-24.288,-19.9,-32.6,242.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1162000
  custom_metrics: {}
  date: 2021-10-29_05-47-29
  done: false
  episode_len_mean: 243.73
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.37300000000008
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3925
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2910718187044674
          cur_lr: 5.000000000000001e-05
          entropy: 0.8017810907628801
          entropy_coeff: 0.009999999999999998
          kl: 0.011013222455381605
          policy_loss: 0.04751235751642121
          total_loss: 0.5276312366127968
          vf_explained_var: 0.8432275652885437
          vf_loss: 0.48493105669816333
    num_agent_steps_sampled: 1162000
    num_agent_steps_trained: 1162000
    num_steps_sampled: 1162000
    num_steps_trained: 1162000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1162,31264.1,1162000,-24.373,-19.9,-32.6,243.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1163000
  custom_metrics: {}
  date: 2021-10-29_05-47-59
  done: false
  episode_len_mean: 244.05
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.405000000000076
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3929
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2910718187044674
          cur_lr: 5.000000000000001e-05
          entropy: 0.4501331839296553
          entropy_coeff: 0.009999999999999998
          kl: 0.01159138872834286
          policy_loss: 0.05528497000535329
          total_loss: 0.6561491486099031
          vf_explained_var: 0.7677473425865173
          vf_loss: 0.601991586221589
    num_agent_steps_sampled: 1163000
    num_agent_steps_trained: 1163000
    num_steps_sampled: 1163000
    num_steps_trained: 1163000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1163,31294.3,1163000,-24.405,-19.9,-32.6,244.05




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1164000
  custom_metrics: {}
  date: 2021-10-29_05-48-45
  done: false
  episode_len_mean: 243.64
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.36400000000008
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3933
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2910718187044674
          cur_lr: 5.000000000000001e-05
          entropy: 0.4572355157799191
          entropy_coeff: 0.009999999999999998
          kl: 0.01278917852755386
          policy_loss: -0.02966031034787496
          total_loss: 1.0010801686180963
          vf_explained_var: 0.39484262466430664
          vf_loss: 1.0315902564260695
    num_agent_steps_sampled: 1164000
    num_agent_steps_trained: 1164000
    num_steps_sampled: 1164000
    num_steps_trained: 1164000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1164,31340,1164000,-24.364,-19.9,-32.6,243.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1165000
  custom_metrics: {}
  date: 2021-10-29_05-49-15
  done: false
  episode_len_mean: 244.12
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.41200000000008
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3937
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2910718187044674
          cur_lr: 5.000000000000001e-05
          entropy: 0.4024511680006981
          entropy_coeff: 0.009999999999999998
          kl: 0.004280864764132354
          policy_loss: -0.06016065780487326
          total_loss: 0.9629486474725936
          vf_explained_var: 0.4591737389564514
          vf_loss: 1.025887777407964
    num_agent_steps_sampled: 1165000
    num_agent_steps_trained: 1165000
    num_steps_sampled: 1165000
    num_steps_trained: 1165000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1165,31369.6,1165000,-24.412,-19.9,-32.6,244.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1166000
  custom_metrics: {}
  date: 2021-10-29_05-49-45
  done: false
  episode_len_mean: 244.47
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.44700000000008
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 5
  episodes_total: 3942
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1455359093522337
          cur_lr: 5.000000000000001e-05
          entropy: 0.2935917114218076
          entropy_coeff: 0.009999999999999998
          kl: 0.00581898100455002
          policy_loss: -0.08679245797296366
          total_loss: 1.0566340850459204
          vf_explained_var: 0.4897229075431824
          vf_loss: 1.1455155975288815
    num_agent_steps_sampled: 1166000
    num_agent_steps_trained: 1166000
    num_steps_sampled: 1166000
    num_steps_trained: 1166000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1166,31399.8,1166000,-24.447,-19.9,-32.6,244.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1167000
  custom_metrics: {}
  date: 2021-10-29_05-50-13
  done: false
  episode_len_mean: 244.75
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.475000000000083
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3946
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1455359093522337
          cur_lr: 5.000000000000001e-05
          entropy: 0.2586919546127319
          entropy_coeff: 0.009999999999999998
          kl: 0.0017949312799990717
          policy_loss: 0.004140597167942259
          total_loss: 0.8950441433323755
          vf_explained_var: 0.3953721821308136
          vf_loss: 0.8932292441527049
    num_agent_steps_sampled: 1167000
    num_agent_steps_trained: 1167000
    num_steps_sampled: 1167000
    num_steps_trained: 1167000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1167,31428.3,1167000,-24.475,-19.9,-32.6,244.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1168000
  custom_metrics: {}
  date: 2021-10-29_05-50-44
  done: false
  episode_len_mean: 244.89
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.48900000000008
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3950
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07276795467611685
          cur_lr: 5.000000000000001e-05
          entropy: 0.47648051049974227
          entropy_coeff: 0.009999999999999998
          kl: 0.044550497250460015
          policy_loss: -0.010773646334807077
          total_loss: 0.8363462312353982
          vf_explained_var: 0.46045657992362976
          vf_loss: 0.84864282310009
    num_agent_steps_sampled: 1168000
    num_agent_steps_trained: 1168000
    num_steps_sampled: 1168000
    num_steps_trained: 1168000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1168,31458.5,1168000,-24.489,-19.9,-32.6,244.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1169000
  custom_metrics: {}
  date: 2021-10-29_05-51-14
  done: false
  episode_len_mean: 245.3
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.530000000000072
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3954
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1091519320141753
          cur_lr: 5.000000000000001e-05
          entropy: 0.49007525973849825
          entropy_coeff: 0.009999999999999998
          kl: 0.007914960173745081
          policy_loss: -0.0055115413334634565
          total_loss: 0.8911700487136841
          vf_explained_var: 0.44498443603515625
          vf_loss: 0.9007184041870965
    num_agent_steps_sampled: 1169000
    num_agent_steps_trained: 1169000
    num_steps_sampled: 1169000
    num_steps_trained: 11690

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1169,31488.6,1169000,-24.53,-19.9,-32.6,245.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1170000
  custom_metrics: {}
  date: 2021-10-29_05-51-44
  done: false
  episode_len_mean: 245.26
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.52600000000008
  episode_reward_min: -32.60000000000019
  episodes_this_iter: 4
  episodes_total: 3958
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1091519320141753
          cur_lr: 5.000000000000001e-05
          entropy: 0.46705269432730145
          entropy_coeff: 0.009999999999999998
          kl: 0.009939642948533963
          policy_loss: 0.01528488223751386
          total_loss: 0.8753060665395525
          vf_explained_var: 0.591762363910675
          vf_loss: 0.863606780105167
    num_agent_steps_sampled: 1170000
    num_agent_steps_trained: 1170000
    num_steps_sampled: 1170000
    num_steps_trained: 1170000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1170,31518.5,1170000,-24.526,-19.9,-32.6,245.26




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1171000
  custom_metrics: {}
  date: 2021-10-29_05-52-26
  done: false
  episode_len_mean: 247.53
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.753000000000085
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 3961
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1091519320141753
          cur_lr: 5.000000000000001e-05
          entropy: 1.2540046645535363
          entropy_coeff: 0.009999999999999998
          kl: 0.023062302714388257
          policy_loss: -0.08214766018920475
          total_loss: 1.1485879802041583
          vf_explained_var: -0.06531362235546112
          vf_loss: 1.2407583905590904
    num_agent_steps_sampled: 1171000
    num_agent_steps_trained: 1171000
    num_steps_sampled: 1171000
    num_steps_trained: 117100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1171,31560.3,1171000,-24.753,-19.9,-43.1,247.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1172000
  custom_metrics: {}
  date: 2021-10-29_05-52-54
  done: false
  episode_len_mean: 247.92
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.79200000000008
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 3965
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16372789802126292
          cur_lr: 5.000000000000001e-05
          entropy: 0.730076195879115
          entropy_coeff: 0.009999999999999998
          kl: 0.02439553302747844
          policy_loss: 0.01310621624191602
          total_loss: 1.1204304483201768
          vf_explained_var: 0.38232389092445374
          vf_loss: 1.1106307513184017
    num_agent_steps_sampled: 1172000
    num_agent_steps_trained: 1172000
    num_steps_sampled: 1172000
    num_steps_trained: 1172000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1172,31588.9,1172000,-24.792,-19.9,-43.1,247.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1173000
  custom_metrics: {}
  date: 2021-10-29_05-53-19
  done: false
  episode_len_mean: 249.43
  episode_media: {}
  episode_reward_max: -19.900000000000013
  episode_reward_mean: -24.943000000000083
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 3969
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24559184703189443
          cur_lr: 5.000000000000001e-05
          entropy: 1.07991953558392
          entropy_coeff: 0.009999999999999998
          kl: 0.019431878736575925
          policy_loss: 0.016718906495306225
          total_loss: 1.0138422820303175
          vf_explained_var: 0.419301301240921
          vf_loss: 1.0031502650843727
    num_agent_steps_sampled: 1173000
    num_agent_steps_trained: 1173000
    num_steps_sampled: 1173000
    num_steps_trained: 1173000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1173,31613.5,1173000,-24.943,-19.9,-43.1,249.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1174000
  custom_metrics: {}
  date: 2021-10-29_05-53-45
  done: false
  episode_len_mean: 250.31
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.03100000000009
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 3972
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24559184703189443
          cur_lr: 5.000000000000001e-05
          entropy: 0.7559495396084256
          entropy_coeff: 0.009999999999999998
          kl: 0.01659601319367978
          policy_loss: -0.09365971982479096
          total_loss: 0.6805761653516028
          vf_explained_var: 0.5484848022460938
          vf_loss: 0.7777195311254925
    num_agent_steps_sampled: 1174000
    num_agent_steps_trained: 1174000
    num_steps_sampled: 1174000
    num_steps_trained: 1174000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1174,31640,1174000,-25.031,-20.5,-43.1,250.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1175000
  custom_metrics: {}
  date: 2021-10-29_05-54-16
  done: false
  episode_len_mean: 250.49
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.04900000000009
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 5
  episodes_total: 3977
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24559184703189443
          cur_lr: 5.000000000000001e-05
          entropy: 0.5197933206955592
          entropy_coeff: 0.009999999999999998
          kl: 0.0450670900499646
          policy_loss: 0.012746878837545713
          total_loss: 0.8627073917124006
          vf_explained_var: 0.6634740829467773
          vf_loss: 0.8440903461641736
    num_agent_steps_sampled: 1175000
    num_agent_steps_trained: 1175000
    num_steps_sampled: 1175000
    num_steps_trained: 1175000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1175,31670.7,1175000,-25.049,-20.5,-43.1,250.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1176000
  custom_metrics: {}
  date: 2021-10-29_05-54-47
  done: false
  episode_len_mean: 250.28
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.028000000000088
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 3981
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.36838777054784166
          cur_lr: 5.000000000000001e-05
          entropy: 0.7184622324175305
          entropy_coeff: 0.009999999999999998
          kl: 0.010808759798000268
          policy_loss: 0.030617648363113405
          total_loss: 0.45304499748680327
          vf_explained_var: 0.8530928492546082
          vf_loss: 0.4256301522254944
    num_agent_steps_sampled: 1176000
    num_agent_steps_trained: 1176000
    num_steps_sampled: 1176000
    num_steps_trained: 1176000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1176,31702,1176000,-25.028,-20.5,-43.1,250.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1177000
  custom_metrics: {}
  date: 2021-10-29_05-55-17
  done: false
  episode_len_mean: 250.32
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -25.032000000000085
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 3985
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.36838777054784166
          cur_lr: 5.000000000000001e-05
          entropy: 0.6500662651326922
          entropy_coeff: 0.009999999999999998
          kl: 0.006094236764239448
          policy_loss: 0.047934311918086475
          total_loss: 0.8808796776665582
          vf_explained_var: 0.494581937789917
          vf_loss: 0.8372009913126628
    num_agent_steps_sampled: 1177000
    num_agent_steps_trained: 1177000
    num_steps_sampled: 1177000
    num_steps_trained: 1177000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1177,31731.4,1177000,-25.032,-20.5,-43.1,250.32




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1178000
  custom_metrics: {}
  date: 2021-10-29_05-56-06
  done: false
  episode_len_mean: 248.29
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.829000000000082
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 5
  episodes_total: 3990
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.36838777054784166
          cur_lr: 5.000000000000001e-05
          entropy: 0.2902620399991671
          entropy_coeff: 0.009999999999999998
          kl: 0.003337168901609486
          policy_loss: 0.02353338532977634
          total_loss: 0.8791400134563446
          vf_explained_var: 0.6597132682800293
          vf_loss: 0.8572798675960964
    num_agent_steps_sampled: 1178000
    num_agent_steps_trained: 1178000
    num_steps_sampled: 1178000
    num_steps_trained: 1178000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1178,31780.4,1178000,-24.829,-20.5,-43.1,248.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1179000
  custom_metrics: {}
  date: 2021-10-29_05-56-34
  done: false
  episode_len_mean: 248.39
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.839000000000084
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 3993
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 0.3890910026099947
          entropy_coeff: 0.009999999999999998
          kl: 0.007038704707277235
          policy_loss: 0.06370882640282313
          total_loss: 0.7646606690353818
          vf_explained_var: 0.41572892665863037
          vf_loss: 0.7035462610423565
    num_agent_steps_sampled: 1179000
    num_agent_steps_trained: 1179000
    num_steps_sampled: 1179000
    num_steps_trained: 1179000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1179,31808.9,1179000,-24.839,-20.5,-43.1,248.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1180000
  custom_metrics: {}
  date: 2021-10-29_05-57-05
  done: false
  episode_len_mean: 247.65
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.765000000000082
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 5
  episodes_total: 3998
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 0.3406694336069955
          entropy_coeff: 0.009999999999999998
          kl: 0.005706918160593154
          policy_loss: -0.09244058488143815
          total_loss: 0.9073102355003357
          vf_explained_var: 0.49130359292030334
          vf_loss: 1.002106331454383
    num_agent_steps_sampled: 1180000
    num_agent_steps_trained: 1180000
    num_steps_sampled: 1180000
    num_steps_trained: 1180000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1180,31839.4,1180000,-24.765,-20.5,-43.1,247.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1181000
  custom_metrics: {}
  date: 2021-10-29_05-57-36
  done: false
  episode_len_mean: 246.81
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.681000000000072
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 4002
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 0.7924303246868981
          entropy_coeff: 0.009999999999999998
          kl: 0.010957474882299796
          policy_loss: 0.05578402280807495
          total_loss: 0.7856482817067041
          vf_explained_var: 0.6751810312271118
          vf_loss: 0.7357702612876892
    num_agent_steps_sampled: 1181000
    num_agent_steps_trained: 1181000
    num_steps_sampled: 1181000
    num_steps_trained: 1181000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1181,31870.2,1181000,-24.681,-20.5,-43.1,246.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1182000
  custom_metrics: {}
  date: 2021-10-29_05-58-06
  done: false
  episode_len_mean: 246.25
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.62500000000008
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 4006
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 0.5658903542492125
          entropy_coeff: 0.009999999999999998
          kl: 0.005304329294458417
          policy_loss: -0.025646057890521157
          total_loss: 0.4895995477835337
          vf_explained_var: 0.7549564838409424
          vf_loss: 0.5199274844593472
    num_agent_steps_sampled: 1182000
    num_agent_steps_trained: 1182000
    num_steps_sampled: 1182000
    num_steps_trained: 1182000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1182,31900.1,1182000,-24.625,-20.5,-43.1,246.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1183000
  custom_metrics: {}
  date: 2021-10-29_05-58-36
  done: false
  episode_len_mean: 245.86
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.586000000000084
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 4010
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18419388527392083
          cur_lr: 5.000000000000001e-05
          entropy: 0.5225820614231957
          entropy_coeff: 0.009999999999999998
          kl: 0.05226797635913398
          policy_loss: -0.014627802289194532
          total_loss: 0.9608790208896001
          vf_explained_var: 0.5651941895484924
          vf_loss: 0.971105201376809
    num_agent_steps_sampled: 1183000
    num_agent_steps_trained: 1183000
    num_steps_sampled: 1183000
    num_steps_trained: 1183000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1183,31930.7,1183000,-24.586,-20.5,-43.1,245.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1184000
  custom_metrics: {}
  date: 2021-10-29_05-59-07
  done: false
  episode_len_mean: 245.95
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.595000000000077
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 5
  episodes_total: 4015
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27629082791088116
          cur_lr: 5.000000000000001e-05
          entropy: 0.743951968020863
          entropy_coeff: 0.009999999999999998
          kl: 0.024770961109359904
          policy_loss: -0.03503565481967396
          total_loss: 1.1052397261063258
          vf_explained_var: 0.4526750445365906
          vf_loss: 1.1408709208170573
    num_agent_steps_sampled: 1184000
    num_agent_steps_trained: 1184000
    num_steps_sampled: 1184000
    num_steps_trained: 1184000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1184,31961.5,1184000,-24.595,-20.5,-43.1,245.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1185000
  custom_metrics: {}
  date: 2021-10-29_05-59-38
  done: false
  episode_len_mean: 244.27
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.42700000000008
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 4019
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4144362418663218
          cur_lr: 5.000000000000001e-05
          entropy: 0.47102302743328944
          entropy_coeff: 0.009999999999999998
          kl: 0.006413345996787711
          policy_loss: 0.06026201438572672
          total_loss: 0.6042439050144619
          vf_explained_var: 0.7267324924468994
          vf_loss: 0.5460341960191727
    num_agent_steps_sampled: 1185000
    num_agent_steps_trained: 1185000
    num_steps_sampled: 1185000
    num_steps_trained: 1185000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1185,31992.7,1185000,-24.427,-20.5,-43.1,244.27




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1186000
  custom_metrics: {}
  date: 2021-10-29_06-00-27
  done: false
  episode_len_mean: 243.74
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.374000000000073
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 4023
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4144362418663218
          cur_lr: 5.000000000000001e-05
          entropy: 0.49341223074330226
          entropy_coeff: 0.009999999999999998
          kl: 0.006614194494924183
          policy_loss: -0.0672346391611629
          total_loss: 0.4407048308187061
          vf_explained_var: 0.7282330989837646
          vf_loss: 0.5101324326462215
    num_agent_steps_sampled: 1186000
    num_agent_steps_trained: 1186000
    num_steps_sampled: 1186000
    num_steps_trained: 1186000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1186,32041.6,1186000,-24.374,-20.5,-43.1,243.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1187000
  custom_metrics: {}
  date: 2021-10-29_06-00-59
  done: false
  episode_len_mean: 243.22
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.322000000000074
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 5
  episodes_total: 4028
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4144362418663218
          cur_lr: 5.000000000000001e-05
          entropy: 0.7919851793183221
          entropy_coeff: 0.009999999999999998
          kl: 0.006056889355043735
          policy_loss: -0.013203783416085774
          total_loss: 0.7124152210023668
          vf_explained_var: 0.6336554884910583
          vf_loss: 0.7310286581516265
    num_agent_steps_sampled: 1187000
    num_agent_steps_trained: 1187000
    num_steps_sampled: 1187000
    num_steps_trained: 1187000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1187,32072.9,1187000,-24.322,-20.5,-43.1,243.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1188000
  custom_metrics: {}
  date: 2021-10-29_06-01-31
  done: false
  episode_len_mean: 242.97
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.29700000000008
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 4032
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4144362418663218
          cur_lr: 5.000000000000001e-05
          entropy: 0.4707288776834806
          entropy_coeff: 0.009999999999999998
          kl: 0.006787631872439053
          policy_loss: 0.02767601079410977
          total_loss: 0.7475535571575165
          vf_explained_var: 0.5247778296470642
          vf_loss: 0.7217717968755298
    num_agent_steps_sampled: 1188000
    num_agent_steps_trained: 1188000
    num_steps_sampled: 1188000
    num_steps_trained: 1188000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1188,32105,1188000,-24.297,-20.5,-43.1,242.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1189000
  custom_metrics: {}
  date: 2021-10-29_06-02-03
  done: false
  episode_len_mean: 242.02
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.202000000000076
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 5
  episodes_total: 4037
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4144362418663218
          cur_lr: 5.000000000000001e-05
          entropy: 0.45281652477052475
          entropy_coeff: 0.009999999999999998
          kl: 0.002943885522147285
          policy_loss: -0.01543080508708954
          total_loss: 0.7729297512107425
          vf_explained_var: 0.5219462513923645
          vf_loss: 0.7916686687204573
    num_agent_steps_sampled: 1189000
    num_agent_steps_trained: 1189000
    num_steps_sampled: 1189000
    num_steps_trained: 1189000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1189,32137,1189000,-24.202,-20.5,-43.1,242.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1190000
  custom_metrics: {}
  date: 2021-10-29_06-02-28
  done: false
  episode_len_mean: 243.39
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.339000000000084
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 3
  episodes_total: 4040
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2072181209331609
          cur_lr: 5.000000000000001e-05
          entropy: 0.9518749102950096
          entropy_coeff: 0.009999999999999998
          kl: 0.038841150538664396
          policy_loss: 0.09979767898718515
          total_loss: 0.6307261698775821
          vf_explained_var: 0.46469658613204956
          vf_loss: 0.5323986411922508
    num_agent_steps_sampled: 1190000
    num_agent_steps_trained: 1190000
    num_steps_sampled: 1190000
    num_steps_trained: 1190000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1190,32162.6,1190000,-24.339,-20.5,-43.1,243.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1191000
  custom_metrics: {}
  date: 2021-10-29_06-02-58
  done: false
  episode_len_mean: 243.58
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.358000000000075
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 4044
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3108271813997413
          cur_lr: 5.000000000000001e-05
          entropy: 0.6419863830010096
          entropy_coeff: 0.009999999999999998
          kl: 0.004270005806659406
          policy_loss: -0.0005043794297509723
          total_loss: 0.9729656881756252
          vf_explained_var: 0.25474727153778076
          vf_loss: 0.9785626987616222
    num_agent_steps_sampled: 1191000
    num_agent_steps_trained: 1191000
    num_steps_sampled: 1191000
    num_steps_trained: 119100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1191,32191.8,1191000,-24.358,-20.5,-43.1,243.58


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1192000
  custom_metrics: {}
  date: 2021-10-29_06-03-29
  done: false
  episode_len_mean: 242.84
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.284000000000074
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 5
  episodes_total: 4049
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 0.39285702258348465
          entropy_coeff: 0.009999999999999998
          kl: 0.005659695212554854
          policy_loss: -0.031659488214386834
          total_loss: 1.0093983170058993
          vf_explained_var: 0.41689854860305786
          vf_loss: 1.0441067765156429
    num_agent_steps_sampled: 1192000
    num_agent_steps_trained: 1192000
    num_steps_sampled: 1192000
    num_steps_trained: 11920

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1192,32222.8,1192000,-24.284,-20.5,-43.1,242.84




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1193000
  custom_metrics: {}
  date: 2021-10-29_06-04-17
  done: false
  episode_len_mean: 242.49
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.249000000000084
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 4053
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 0.6782451689243316
          entropy_coeff: 0.009999999999999998
          kl: 0.010885257515427406
          policy_loss: 0.019357474976115757
          total_loss: 0.8165177192952898
          vf_explained_var: 0.5135557651519775
          vf_loss: 0.8022509849733777
    num_agent_steps_sampled: 1193000
    num_agent_steps_trained: 1193000
    num_steps_sampled: 1193000
    num_steps_trained: 1193000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1193,32271.3,1193000,-24.249,-20.5,-43.1,242.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1194000
  custom_metrics: {}
  date: 2021-10-29_06-04-48
  done: false
  episode_len_mean: 242.51
  episode_media: {}
  episode_reward_max: -20.50000000000002
  episode_reward_mean: -24.251000000000072
  episode_reward_min: -43.10000000000034
  episodes_this_iter: 4
  episodes_total: 4057
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 0.5097845782836278
          entropy_coeff: 0.009999999999999998
          kl: 0.006313822733461746
          policy_loss: 0.009781965696149402
          total_loss: 0.6714920305543476
          vf_explained_var: 0.42778244614601135
          vf_loss: 0.6658266600635316
    num_agent_steps_sampled: 1194000
    num_agent_steps_trained: 1194000
    num_steps_sampled: 1194000
    num_steps_trained: 1194000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1194,32302.4,1194000,-24.251,-20.5,-43.1,242.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1195000
  custom_metrics: {}
  date: 2021-10-29_06-05-16
  done: false
  episode_len_mean: 241.62
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.16200000000007
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4061
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 0.9573594159550137
          entropy_coeff: 0.009999999999999998
          kl: 0.01339353351904609
          policy_loss: -0.022988257764114273
          total_loss: 0.952599722146988
          vf_explained_var: 0.3937453329563141
          vf_loss: 0.9830800460444556
    num_agent_steps_sampled: 1195000
    num_agent_steps_trained: 1195000
    num_steps_sampled: 1195000
    num_steps_trained: 1195000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1195,32330.1,1195000,-24.162,-21.3,-39,241.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1196000
  custom_metrics: {}
  date: 2021-10-29_06-05-47
  done: false
  episode_len_mean: 240.79
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -24.079000000000068
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4065
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 0.8231629762384627
          entropy_coeff: 0.009999999999999998
          kl: 0.016236519157790022
          policy_loss: 0.008361972288952933
          total_loss: 0.8537770807743073
          vf_explained_var: 0.43779391050338745
          vf_loss: 0.8511233621173435
    num_agent_steps_sampled: 1196000
    num_agent_steps_trained: 1196000
    num_steps_sampled: 1196000
    num_steps_trained: 11960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1196,32360.9,1196000,-24.079,-21.3,-39,240.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1197000
  custom_metrics: {}
  date: 2021-10-29_06-06-18
  done: false
  episode_len_mean: 239.03
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -23.90300000000007
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4069
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15541359069987065
          cur_lr: 5.000000000000001e-05
          entropy: 0.7002823438909319
          entropy_coeff: 0.009999999999999998
          kl: 0.02647028799547573
          policy_loss: -0.00492171479596032
          total_loss: 0.7709993425342772
          vf_explained_var: 0.6290401220321655
          vf_loss: 0.7788100318776237
    num_agent_steps_sampled: 1197000
    num_agent_steps_trained: 1197000
    num_steps_sampled: 1197000
    num_steps_trained: 1197000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1197,32392,1197000,-23.903,-21.3,-39,239.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1198000
  custom_metrics: {}
  date: 2021-10-29_06-06-50
  done: false
  episode_len_mean: 237.29
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -23.72900000000007
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 5
  episodes_total: 4074
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23312038604980592
          cur_lr: 5.000000000000001e-05
          entropy: 0.3079693549209171
          entropy_coeff: 0.009999999999999998
          kl: 0.010161243239400382
          policy_loss: -0.013268397086196475
          total_loss: 1.0191946380668215
          vf_explained_var: 0.5721603631973267
          vf_loss: 1.0331739438904657
    num_agent_steps_sampled: 1198000
    num_agent_steps_trained: 1198000
    num_steps_sampled: 1198000
    num_steps_trained: 119800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1198,32423.9,1198000,-23.729,-21.3,-39,237.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1199000
  custom_metrics: {}
  date: 2021-10-29_06-07-20
  done: false
  episode_len_mean: 237.07
  episode_media: {}
  episode_reward_max: -21.300000000000033
  episode_reward_mean: -23.70700000000006
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4078
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23312038604980592
          cur_lr: 5.000000000000001e-05
          entropy: 0.37184539470407696
          entropy_coeff: 0.009999999999999998
          kl: 0.0021899917818345217
          policy_loss: 0.008553903549909592
          total_loss: 0.8805817464987437
          vf_explained_var: 0.44173040986061096
          vf_loss: 0.8752357608742184
    num_agent_steps_sampled: 1199000
    num_agent_steps_trained: 1199000
    num_steps_sampled: 1199000
    num_steps_trained: 1199

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1199,32454.1,1199000,-23.707,-21.3,-39,237.07




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1200000
  custom_metrics: {}
  date: 2021-10-29_06-08-08
  done: false
  episode_len_mean: 236.77
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -23.677000000000064
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4082
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11656019302490296
          cur_lr: 5.000000000000001e-05
          entropy: 0.494307688706451
          entropy_coeff: 0.009999999999999998
          kl: 0.03429280479397363
          policy_loss: 0.0020494547569089466
          total_loss: 1.1681531760427686
          vf_explained_var: 0.34838759899139404
          vf_loss: 1.1670496311452654
    num_agent_steps_sampled: 1200000
    num_agent_steps_trained: 1200000
    num_steps_sampled: 1200000
    num_steps_trained: 120000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1200,32502.4,1200000,-23.677,-20.2,-39,236.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1201000
  custom_metrics: {}
  date: 2021-10-29_06-08-41
  done: false
  episode_len_mean: 236.59
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -23.65900000000006
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 5
  episodes_total: 4087
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17484028953735445
          cur_lr: 5.000000000000001e-05
          entropy: 0.47809465494420794
          entropy_coeff: 0.009999999999999998
          kl: 0.016196592406260556
          policy_loss: -0.014749035818709268
          total_loss: 1.1104188329643674
          vf_explained_var: 0.481458455324173
          vf_loss: 1.1271169980367024
    num_agent_steps_sampled: 1201000
    num_agent_steps_trained: 1201000
    num_steps_sampled: 1201000
    num_steps_trained: 120100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1201,32534.6,1201000,-23.659,-20.2,-39,236.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1202000
  custom_metrics: {}
  date: 2021-10-29_06-09-11
  done: false
  episode_len_mean: 236.04
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -23.604000000000063
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4091
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17484028953735445
          cur_lr: 5.000000000000001e-05
          entropy: 0.7161750687493218
          entropy_coeff: 0.009999999999999998
          kl: 0.004962588301879123
          policy_loss: -0.03457953010996183
          total_loss: 1.0219432055950164
          vf_explained_var: 0.3983902931213379
          vf_loss: 1.0628168205420176
    num_agent_steps_sampled: 1202000
    num_agent_steps_trained: 1202000
    num_steps_sampled: 1202000
    num_steps_trained: 120200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1202,32565.2,1202000,-23.604,-20.2,-39,236.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1203000
  custom_metrics: {}
  date: 2021-10-29_06-09-43
  done: false
  episode_len_mean: 235.84
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -23.584000000000064
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4095
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08742014476867722
          cur_lr: 5.000000000000001e-05
          entropy: 0.6483424991369248
          entropy_coeff: 0.009999999999999998
          kl: 0.016394073609950304
          policy_loss: -0.02235771690805753
          total_loss: 0.9135525385538737
          vf_explained_var: 0.2154388427734375
          vf_loss: 0.9409605051080386
    num_agent_steps_sampled: 1203000
    num_agent_steps_trained: 1203000
    num_steps_sampled: 1203000
    num_steps_trained: 120300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1203,32597.1,1203000,-23.584,-20.2,-39,235.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1204000
  custom_metrics: {}
  date: 2021-10-29_06-10-15
  done: false
  episode_len_mean: 235.28
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -23.528000000000063
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 5
  episodes_total: 4100
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08742014476867722
          cur_lr: 5.000000000000001e-05
          entropy: 0.3103182318309943
          entropy_coeff: 0.009999999999999998
          kl: 0.003648280020823375
          policy_loss: -0.04365406214363045
          total_loss: 1.0798377719190386
          vf_explained_var: 0.3658721148967743
          vf_loss: 1.1262760798136393
    num_agent_steps_sampled: 1204000
    num_agent_steps_trained: 1204000
    num_steps_sampled: 1204000
    num_steps_trained: 120400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1204,32628.9,1204000,-23.528,-20.2,-39,235.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1205000
  custom_metrics: {}
  date: 2021-10-29_06-10-47
  done: false
  episode_len_mean: 235.14
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -23.514000000000063
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4104
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04371007238433861
          cur_lr: 5.000000000000001e-05
          entropy: 0.43252608527739844
          entropy_coeff: 0.009999999999999998
          kl: 0.006629583312901054
          policy_loss: 0.01240334196223153
          total_loss: 0.8237299733691745
          vf_explained_var: 0.37461280822753906
          vf_loss: 0.8153621170255873
    num_agent_steps_sampled: 1205000
    num_agent_steps_trained: 1205000
    num_steps_sampled: 1205000
    num_steps_trained: 12050

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1205,32661.2,1205000,-23.514,-20.2,-39,235.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1206000
  custom_metrics: {}
  date: 2021-10-29_06-11-20
  done: false
  episode_len_mean: 234.94
  episode_media: {}
  episode_reward_max: -20.200000000000017
  episode_reward_mean: -23.494000000000064
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4108
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04371007238433861
          cur_lr: 5.000000000000001e-05
          entropy: 0.18270572241809632
          entropy_coeff: 0.009999999999999998
          kl: 0.0060005350571828864
          policy_loss: -0.009005333731571833
          total_loss: 1.067978310585022
          vf_explained_var: 0.24611835181713104
          vf_loss: 1.0785484347078536
    num_agent_steps_sampled: 1206000
    num_agent_steps_trained: 1206000
    num_steps_sampled: 1206000
    num_steps_trained: 120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1206,32693.8,1206000,-23.494,-20.2,-39,234.94




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1207000
  custom_metrics: {}
  date: 2021-10-29_06-12-05
  done: false
  episode_len_mean: 234.55
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -23.455000000000062
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 5
  episodes_total: 4113
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04371007238433861
          cur_lr: 5.000000000000001e-05
          entropy: 0.606888210773468
          entropy_coeff: 0.009999999999999998
          kl: 0.06379793661770476
          policy_loss: -0.004018933284613821
          total_loss: 1.2749713785118526
          vf_explained_var: 0.30446553230285645
          vf_loss: 1.2822706003983815
    num_agent_steps_sampled: 1207000
    num_agent_steps_trained: 1207000
    num_steps_sampled: 1207000
    num_steps_trained: 120700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1207,32739.3,1207000,-23.455,-20.1,-39,234.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1208000
  custom_metrics: {}
  date: 2021-10-29_06-12-37
  done: false
  episode_len_mean: 234.79
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -23.47900000000006
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4117
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 0.31897127164734734
          entropy_coeff: 0.009999999999999998
          kl: 0.00872905585818289
          policy_loss: 0.028988453331920835
          total_loss: 1.1910674969355266
          vf_explained_var: 0.27341023087501526
          vf_loss: 1.164696431822247
    num_agent_steps_sampled: 1208000
    num_agent_steps_trained: 1208000
    num_steps_sampled: 1208000
    num_steps_trained: 1208000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1208,32770.6,1208000,-23.479,-20.1,-39,234.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1209000
  custom_metrics: {}
  date: 2021-10-29_06-13-07
  done: false
  episode_len_mean: 234.76
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -23.476000000000067
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4121
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 0.3140080167187585
          entropy_coeff: 0.009999999999999998
          kl: 0.012789155608695582
          policy_loss: 0.03456781879067421
          total_loss: 1.1113228115770553
          vf_explained_var: 0.37487906217575073
          vf_loss: 1.0790565583440992
    num_agent_steps_sampled: 1209000
    num_agent_steps_trained: 1209000
    num_steps_sampled: 1209000
    num_steps_trained: 120900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1209,32801.4,1209000,-23.476,-20.1,-39,234.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1210000
  custom_metrics: {}
  date: 2021-10-29_06-13-38
  done: false
  episode_len_mean: 235.06
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -23.506000000000064
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 5
  episodes_total: 4126
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06556510857650792
          cur_lr: 5.000000000000001e-05
          entropy: 0.3593747157189581
          entropy_coeff: 0.009999999999999998
          kl: 0.023129322810285815
          policy_loss: -0.033865429295433895
          total_loss: 1.1908500525686476
          vf_explained_var: 0.4572117328643799
          vf_loss: 1.226792754067315
    num_agent_steps_sampled: 1210000
    num_agent_steps_trained: 1210000
    num_steps_sampled: 1210000
    num_steps_trained: 121000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1210,32832.3,1210000,-23.506,-20.1,-39,235.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1211000
  custom_metrics: {}
  date: 2021-10-29_06-14-10
  done: false
  episode_len_mean: 235.45
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -23.545000000000062
  episode_reward_min: -39.000000000000284
  episodes_this_iter: 4
  episodes_total: 4130
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09834766286476193
          cur_lr: 5.000000000000001e-05
          entropy: 0.23977824085288577
          entropy_coeff: 0.009999999999999998
          kl: 0.004335676594867976
          policy_loss: -0.05097097551657094
          total_loss: 0.784620714518759
          vf_explained_var: 0.5742130279541016
          vf_loss: 0.837563059065077
    num_agent_steps_sampled: 1211000
    num_agent_steps_trained: 1211000
    num_steps_sampled: 1211000
    num_steps_trained: 1211000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1211,32863.5,1211000,-23.545,-20.1,-39,235.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1212000
  custom_metrics: {}
  date: 2021-10-29_06-14-27
  done: false
  episode_len_mean: 240.82
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.082000000000072
  episode_reward_min: -51.50000000000046
  episodes_this_iter: 2
  episodes_total: 4132
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.049173831432380966
          cur_lr: 5.000000000000001e-05
          entropy: 1.6129933370484246
          entropy_coeff: 0.009999999999999998
          kl: 0.051473148080409856
          policy_loss: 0.09994278616375393
          total_loss: 0.6726018574502733
          vf_explained_var: 0.2931148409843445
          vf_loss: 0.5862578690465954
    num_agent_steps_sampled: 1212000
    num_agent_steps_trained: 1212000
    num_steps_sampled: 1212000
    num_steps_trained: 1212000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1212,32880.6,1212000,-24.082,-20.1,-51.5,240.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1213000
  custom_metrics: {}
  date: 2021-10-29_06-14-45
  done: false
  episode_len_mean: 244.22
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.42200000000008
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 2
  episodes_total: 4134
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0737607471485714
          cur_lr: 5.000000000000001e-05
          entropy: 1.3394793026977114
          entropy_coeff: 0.009999999999999998
          kl: 0.021516285045928403
          policy_loss: -0.08266872084803051
          total_loss: 0.632497286134296
          vf_explained_var: -0.0918196439743042
          vf_loss: 0.7269737490349346
    num_agent_steps_sampled: 1213000
    num_agent_steps_trained: 1213000
    num_steps_sampled: 1213000
    num_steps_trained: 1213000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1213,32898.9,1213000,-24.422,-20.1,-55.4,244.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1214000
  custom_metrics: {}
  date: 2021-10-29_06-15-07
  done: false
  episode_len_mean: 249.52
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.952000000000083
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4137
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1106411207228571
          cur_lr: 5.000000000000001e-05
          entropy: 1.3228415648142497
          entropy_coeff: 0.009999999999999998
          kl: 0.02058045578465079
          policy_loss: 0.07912785294983123
          total_loss: 0.7637192443013191
          vf_explained_var: -0.11466565728187561
          vf_loss: 0.6955427608970138
    num_agent_steps_sampled: 1214000
    num_agent_steps_trained: 1214000
    num_steps_sampled: 1214000
    num_steps_trained: 1214000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1214,32920.4,1214000,-24.952,-20.1,-55.4,249.52




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1215000
  custom_metrics: {}
  date: 2021-10-29_06-15-46
  done: false
  episode_len_mean: 249.61
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -24.961000000000084
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4140
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16596168108428566
          cur_lr: 5.000000000000001e-05
          entropy: 1.0622988992267184
          entropy_coeff: 0.009999999999999998
          kl: 0.07055045851970682
          policy_loss: 0.032821108649174376
          total_loss: 0.45060758027765485
          vf_explained_var: 0.2213742733001709
          vf_loss: 0.41670078510004616
    num_agent_steps_sampled: 1215000
    num_agent_steps_trained: 1215000
    num_steps_sampled: 1215000
    num_steps_trained: 121500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1215,32960.2,1215000,-24.961,-20.1,-55.4,249.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1216000
  custom_metrics: {}
  date: 2021-10-29_06-16-09
  done: false
  episode_len_mean: 252.41
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.241000000000096
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 2
  episodes_total: 4142
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24894252162642855
          cur_lr: 5.000000000000001e-05
          entropy: 1.086511054303911
          entropy_coeff: 0.009999999999999998
          kl: 0.021585348974294285
          policy_loss: -0.059490465455585055
          total_loss: 0.8500940564605924
          vf_explained_var: -0.13751031458377838
          vf_loss: 0.9150761307734582
    num_agent_steps_sampled: 1216000
    num_agent_steps_trained: 1216000
    num_steps_sampled: 1216000
    num_steps_trained: 12160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1216,32982.9,1216000,-25.241,-20.1,-55.4,252.41


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1217000
  custom_metrics: {}
  date: 2021-10-29_06-16-30
  done: false
  episode_len_mean: 256.48
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -25.648000000000103
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4145
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37341378243964285
          cur_lr: 5.000000000000001e-05
          entropy: 1.175614125861062
          entropy_coeff: 0.009999999999999998
          kl: 0.014149462270980075
          policy_loss: -0.05496856744090716
          total_loss: 1.0577000512017145
          vf_explained_var: 0.2791609764099121
          vf_loss: 1.1191411707136365
    num_agent_steps_sampled: 1217000
    num_agent_steps_trained: 1217000
    num_steps_sampled: 1217000
    num_steps_trained: 1217000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1217,33003.9,1217000,-25.648,-20.1,-55.4,256.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1218000
  custom_metrics: {}
  date: 2021-10-29_06-16-52
  done: false
  episode_len_mean: 260.99
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -26.099000000000096
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4148
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37341378243964285
          cur_lr: 5.000000000000001e-05
          entropy: 1.1181530243820614
          entropy_coeff: 0.009999999999999998
          kl: 0.008702424957380432
          policy_loss: 0.0360367909901672
          total_loss: 1.0048140860266155
          vf_explained_var: 0.037219565361738205
          vf_loss: 0.9767092158603999
    num_agent_steps_sampled: 1218000
    num_agent_steps_trained: 1218000
    num_steps_sampled: 1218000
    num_steps_trained: 1218000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1218,33026,1218000,-26.099,-20.1,-55.4,260.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1219000
  custom_metrics: {}
  date: 2021-10-29_06-17-14
  done: false
  episode_len_mean: 263.67
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -26.367000000000107
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4151
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37341378243964285
          cur_lr: 5.000000000000001e-05
          entropy: 1.1108676817682055
          entropy_coeff: 0.009999999999999998
          kl: 0.010925174311171067
          policy_loss: 0.004928170889616013
          total_loss: 0.9318318535884221
          vf_explained_var: -0.10033883899450302
          vf_loss: 0.933932750340965
    num_agent_steps_sampled: 1219000
    num_agent_steps_trained: 1219000
    num_steps_sampled: 1219000
    num_steps_trained: 121900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1219,33047.3,1219000,-26.367,-20.1,-55.4,263.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1220000
  custom_metrics: {}
  date: 2021-10-29_06-17-34
  done: false
  episode_len_mean: 268.4
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -26.84000000000011
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4154
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37341378243964285
          cur_lr: 5.000000000000001e-05
          entropy: 1.2611554284890494
          entropy_coeff: 0.009999999999999998
          kl: 0.01936385791263425
          policy_loss: 0.05774702495998806
          total_loss: 1.114010206858317
          vf_explained_var: 0.41837260127067566
          vf_loss: 1.0616439998149871
    num_agent_steps_sampled: 1220000
    num_agent_steps_trained: 1220000
    num_steps_sampled: 1220000
    num_steps_trained: 1220000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1220,33068.1,1220000,-26.84,-20.1,-55.4,268.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1221000
  custom_metrics: {}
  date: 2021-10-29_06-17-59
  done: false
  episode_len_mean: 270.72
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -27.072000000000113
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4157
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37341378243964285
          cur_lr: 5.000000000000001e-05
          entropy: 0.8516264206833309
          entropy_coeff: 0.009999999999999998
          kl: 0.021547908208831266
          policy_loss: 0.08710104003548622
          total_loss: 0.7980640163024266
          vf_explained_var: 0.3270193934440613
          vf_loss: 0.7114329535410636
    num_agent_steps_sampled: 1221000
    num_agent_steps_trained: 1221000
    num_steps_sampled: 1221000
    num_steps_trained: 1221000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1221,33092.2,1221000,-27.072,-20.1,-55.4,270.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1222000
  custom_metrics: {}
  date: 2021-10-29_06-18-20
  done: false
  episode_len_mean: 273.23
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -27.323000000000114
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 2
  episodes_total: 4159
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5601206736594643
          cur_lr: 5.000000000000001e-05
          entropy: 1.272198341290156
          entropy_coeff: 0.009999999999999998
          kl: 0.014225573083491207
          policy_loss: -0.05858703781333235
          total_loss: 0.8817679574092229
          vf_explained_var: 0.016636619344353676
          vf_loss: 0.9451089556018512
    num_agent_steps_sampled: 1222000
    num_agent_steps_trained: 1222000
    num_steps_sampled: 1222000
    num_steps_trained: 1222000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1222,33113.4,1222000,-27.323,-20.1,-55.4,273.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1223000
  custom_metrics: {}
  date: 2021-10-29_06-18-45
  done: false
  episode_len_mean: 274.13
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -27.41300000000012
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4162
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5601206736594643
          cur_lr: 5.000000000000001e-05
          entropy: 1.0508725093470679
          entropy_coeff: 0.009999999999999998
          kl: 0.02451905258390015
          policy_loss: -0.05284522217180994
          total_loss: 1.2398892203966776
          vf_explained_var: 0.27866920828819275
          vf_loss: 1.2895095573531257
    num_agent_steps_sampled: 1223000
    num_agent_steps_trained: 1223000
    num_steps_sampled: 1223000
    num_steps_trained: 1223000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1223,33138.6,1223000,-27.413,-20.1,-55.4,274.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1224000
  custom_metrics: {}
  date: 2021-10-29_06-19-10
  done: false
  episode_len_mean: 277.45
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -27.745000000000122
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4166
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8401810104891966
          cur_lr: 5.000000000000001e-05
          entropy: 0.915217583709293
          entropy_coeff: 0.009999999999999998
          kl: 0.02763742829556496
          policy_loss: 0.06633584565586514
          total_loss: 1.1258252869049707
          vf_explained_var: 0.5350567102432251
          vf_loss: 1.045421171064178
    num_agent_steps_sampled: 1224000
    num_agent_steps_trained: 1224000
    num_steps_sampled: 1224000
    num_steps_trained: 1224000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1224,33163.4,1224000,-27.745,-20.1,-55.4,277.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1225000
  custom_metrics: {}
  date: 2021-10-29_06-19-36
  done: false
  episode_len_mean: 279.95
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -27.995000000000122
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4169
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2602715157337947
          cur_lr: 5.000000000000001e-05
          entropy: 1.2733864976300133
          entropy_coeff: 0.009999999999999998
          kl: 0.005532770491243216
          policy_loss: -0.053885690205627015
          total_loss: 1.1237396640910042
          vf_explained_var: 0.12815013527870178
          vf_loss: 1.1833864296476047
    num_agent_steps_sampled: 1225000
    num_agent_steps_trained: 1225000
    num_steps_sampled: 1225000
    num_steps_trained: 122500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1225,33189.5,1225000,-27.995,-20.1,-55.4,279.95




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1226000
  custom_metrics: {}
  date: 2021-10-29_06-20-20
  done: false
  episode_len_mean: 281.35
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.135000000000126
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4173
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2602715157337947
          cur_lr: 5.000000000000001e-05
          entropy: 1.0674485348992877
          entropy_coeff: 0.009999999999999998
          kl: 0.004993889876887576
          policy_loss: -0.006111866732438406
          total_loss: 1.2218406541479958
          vf_explained_var: 0.4091515839099884
          vf_loss: 1.2323333675662675
    num_agent_steps_sampled: 1226000
    num_agent_steps_trained: 1226000
    num_steps_sampled: 1226000
    num_steps_trained: 1226000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1226,33233.7,1226000,-28.135,-20.1,-55.4,281.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1227000
  custom_metrics: {}
  date: 2021-10-29_06-20-48
  done: false
  episode_len_mean: 283.1
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.31000000000013
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4177
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6301357578668974
          cur_lr: 5.000000000000001e-05
          entropy: 0.482080630130238
          entropy_coeff: 0.009999999999999998
          kl: 0.004841686391617945
          policy_loss: 0.04039761970440547
          total_loss: 1.122063496377733
          vf_explained_var: 0.575264036655426
          vf_loss: 1.0834357774919934
    num_agent_steps_sampled: 1227000
    num_agent_steps_trained: 1227000
    num_steps_sampled: 1227000
    num_steps_trained: 1227000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1227,33261.8,1227000,-28.31,-20.1,-55.4,283.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1228000
  custom_metrics: {}
  date: 2021-10-29_06-21-11
  done: false
  episode_len_mean: 286.69
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.669000000000143
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4180
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3150678789334487
          cur_lr: 5.000000000000001e-05
          entropy: 1.5665540430280898
          entropy_coeff: 0.009999999999999998
          kl: 0.017938640043239217
          policy_loss: 0.013104310797320471
          total_loss: 1.162521822916137
          vf_explained_var: -0.19171901047229767
          vf_loss: 1.1594311771293482
    num_agent_steps_sampled: 1228000
    num_agent_steps_trained: 1228000
    num_steps_sampled: 1228000
    num_steps_trained: 1228000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1228,33284.5,1228000,-28.669,-20.1,-55.4,286.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1229000
  custom_metrics: {}
  date: 2021-10-29_06-21-32
  done: false
  episode_len_mean: 289.46
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.94600000000014
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 2
  episodes_total: 4182
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3150678789334487
          cur_lr: 5.000000000000001e-05
          entropy: 1.4250974204805162
          entropy_coeff: 0.009999999999999998
          kl: 0.02292230624409686
          policy_loss: -0.07756500757402844
          total_loss: 1.0771479616562525
          vf_explained_var: 0.10769940167665482
          vf_loss: 1.1617418431573443
    num_agent_steps_sampled: 1229000
    num_agent_steps_trained: 1229000
    num_steps_sampled: 1229000
    num_steps_trained: 1229000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1229,33305.9,1229000,-28.946,-20.1,-55.4,289.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1230000
  custom_metrics: {}
  date: 2021-10-29_06-21-56
  done: false
  episode_len_mean: 292.17
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.217000000000144
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4186
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4726018184001731
          cur_lr: 5.000000000000001e-05
          entropy: 1.4854899750815498
          entropy_coeff: 0.009999999999999998
          kl: 0.022302053461365014
          policy_loss: -0.11127876556581921
          total_loss: 1.1603289880686336
          vf_explained_var: 0.3244568407535553
          vf_loss: 1.2759226520856222
    num_agent_steps_sampled: 1230000
    num_agent_steps_trained: 1230000
    num_steps_sampled: 1230000
    num_steps_trained: 1230000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1230,33329.2,1230000,-29.217,-20.1,-55.4,292.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1231000
  custom_metrics: {}
  date: 2021-10-29_06-22-26
  done: false
  episode_len_mean: 292.31
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.23100000000014
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4189
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 0.46216800014177956
          entropy_coeff: 0.009999999999999998
          kl: 0.01162240707269733
          policy_loss: 0.08504498758249812
          total_loss: 0.37101956092649035
          vf_explained_var: 0.8484077453613281
          vf_loss: 0.2823570938573943
    num_agent_steps_sampled: 1231000
    num_agent_steps_trained: 1231000
    num_steps_sampled: 1231000
    num_steps_trained: 1231000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1231,33359.2,1231000,-29.231,-20.1,-55.4,292.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1232000
  custom_metrics: {}
  date: 2021-10-29_06-22-51
  done: false
  episode_len_mean: 295.03
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.503000000000146
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4193
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 0.9881017625331878
          entropy_coeff: 0.009999999999999998
          kl: 0.013098562323125248
          policy_loss: 0.05909046249257194
          total_loss: 1.322677962647544
          vf_explained_var: 0.43256455659866333
          vf_loss: 1.264182921912935
    num_agent_steps_sampled: 1232000
    num_agent_steps_trained: 1232000
    num_steps_sampled: 1232000
    num_steps_trained: 1232000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1232,33384.3,1232000,-29.503,-20.1,-55.4,295.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1233000
  custom_metrics: {}
  date: 2021-10-29_06-23-19
  done: false
  episode_len_mean: 296.16
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.61600000000015
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4196
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 0.7499065193865034
          entropy_coeff: 0.009999999999999998
          kl: 0.011538921628361563
          policy_loss: -0.04482719674706459
          total_loss: 0.7688892526759041
          vf_explained_var: 0.6080446243286133
          vf_loss: 0.8130355397860209
    num_agent_steps_sampled: 1233000
    num_agent_steps_trained: 1233000
    num_steps_sampled: 1233000
    num_steps_trained: 1233000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1233,33412,1233000,-29.616,-20.1,-55.4,296.16




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1234000
  custom_metrics: {}
  date: 2021-10-29_06-24-06
  done: false
  episode_len_mean: 298.13
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.81300000000015
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4200
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 0.8209504230154885
          entropy_coeff: 0.009999999999999998
          kl: 0.012703674793110369
          policy_loss: 0.010695348928372065
          total_loss: 1.3083047946294148
          vf_explained_var: 0.2551041841506958
          vf_loss: 1.296813295284907
    num_agent_steps_sampled: 1234000
    num_agent_steps_trained: 1234000
    num_steps_sampled: 1234000
    num_steps_trained: 1234000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1234,33459,1234000,-29.813,-20.1,-55.4,298.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1235000
  custom_metrics: {}
  date: 2021-10-29_06-24-30
  done: false
  episode_len_mean: 300.81
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.081000000000152
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4204
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 1.2624481836954753
          entropy_coeff: 0.009999999999999998
          kl: 0.016686487442554764
          policy_loss: -0.01347228917810652
          total_loss: 1.540375304222107
          vf_explained_var: 0.13946102559566498
          vf_loss: 1.5546429806285433
    num_agent_steps_sampled: 1235000
    num_agent_steps_trained: 1235000
    num_steps_sampled: 1235000
    num_steps_trained: 1235000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1235,33483.6,1235000,-30.081,-20.1,-55.4,300.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1236000
  custom_metrics: {}
  date: 2021-10-29_06-24-57
  done: false
  episode_len_mean: 302.06
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.206000000000163
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4207
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 0.8315776685873667
          entropy_coeff: 0.009999999999999998
          kl: 0.00852567468123773
          policy_loss: -0.03010679086049398
          total_loss: 0.8421987142827776
          vf_explained_var: 0.4694339632987976
          vf_loss: 0.8745774100224177
    num_agent_steps_sampled: 1236000
    num_agent_steps_trained: 1236000
    num_steps_sampled: 1236000
    num_steps_trained: 1236000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1236,33510.8,1236000,-30.206,-20.1,-55.4,302.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1237000
  custom_metrics: {}
  date: 2021-10-29_06-25-23
  done: false
  episode_len_mean: 305.18
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.51800000000017
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4211
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 1.326836731698778
          entropy_coeff: 0.009999999999999998
          kl: 0.0053001058182894035
          policy_loss: -0.06627769130799506
          total_loss: 1.4035670651329888
          vf_explained_var: 0.1028791218996048
          vf_loss: 1.4793558571073744
    num_agent_steps_sampled: 1237000
    num_agent_steps_trained: 1237000
    num_steps_sampled: 1237000
    num_steps_trained: 1237000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1237,33536.3,1237000,-30.518,-20.3,-55.4,305.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1238000
  custom_metrics: {}
  date: 2021-10-29_06-25-48
  done: false
  episode_len_mean: 307.19
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.719000000000168
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4214
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 1.0942428059048124
          entropy_coeff: 0.009999999999999998
          kl: 0.008673589712007553
          policy_loss: 0.08338306645552317
          total_loss: 0.9404903776115842
          vf_explained_var: -0.006017054431140423
          vf_loss: 0.8619009952578279
    num_agent_steps_sampled: 1238000
    num_agent_steps_trained: 1238000
    num_steps_sampled: 1238000
    num_steps_trained: 1238000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1238,33561.2,1238000,-30.719,-20.3,-55.4,307.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1239000
  custom_metrics: {}
  date: 2021-10-29_06-26-18
  done: false
  episode_len_mean: 307.48
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.74800000000017
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4218
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 0.4919088472922643
          entropy_coeff: 0.009999999999999998
          kl: 0.008319617439652028
          policy_loss: 0.0517131669446826
          total_loss: 0.6570098916689555
          vf_explained_var: 0.5866870880126953
          vf_loss: 0.6043180104759004
    num_agent_steps_sampled: 1239000
    num_agent_steps_trained: 1239000
    num_steps_sampled: 1239000
    num_steps_trained: 1239000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1239,33591.1,1239000,-30.748,-20.3,-55.4,307.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1240000
  custom_metrics: {}
  date: 2021-10-29_06-26-44
  done: false
  episode_len_mean: 308.91
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.89100000000017
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4222
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 0.8704044666555193
          entropy_coeff: 0.009999999999999998
          kl: 0.006026866648313797
          policy_loss: -0.015845128811068004
          total_loss: 1.0214668770631155
          vf_explained_var: 0.40999239683151245
          vf_loss: 1.0417435814936955
    num_agent_steps_sampled: 1240000
    num_agent_steps_trained: 1240000
    num_steps_sampled: 1240000
    num_steps_trained: 1240000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1240,33616.8,1240000,-30.891,-20.3,-55.4,308.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1241000
  custom_metrics: {}
  date: 2021-10-29_06-27-13
  done: false
  episode_len_mean: 309.65
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.965000000000167
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4226
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7089027276002594
          cur_lr: 5.000000000000001e-05
          entropy: 0.5271656529770957
          entropy_coeff: 0.009999999999999998
          kl: 0.0035336654149972445
          policy_loss: 0.04327785943945249
          total_loss: 0.5227073866460058
          vf_explained_var: 0.7972787022590637
          vf_loss: 0.48219615320364634
    num_agent_steps_sampled: 1241000
    num_agent_steps_trained: 1241000
    num_steps_sampled: 1241000
    num_steps_trained: 1241000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1241,33646.7,1241000,-30.965,-20.3,-55.4,309.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1242000
  custom_metrics: {}
  date: 2021-10-29_06-27-41
  done: false
  episode_len_mean: 310.75
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -31.075000000000177
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 3
  episodes_total: 4229
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3544513638001297
          cur_lr: 5.000000000000001e-05
          entropy: 0.8221335659424464
          entropy_coeff: 0.009999999999999998
          kl: 0.009673366843757789
          policy_loss: -0.052131622698571946
          total_loss: 0.9220628798007965
          vf_explained_var: 0.5234425663948059
          vf_loss: 0.9789871020449532
    num_agent_steps_sampled: 1242000
    num_agent_steps_trained: 1242000
    num_steps_sampled: 1242000
    num_steps_trained: 1242000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1242,33674.1,1242000,-31.075,-20.3,-55.4,310.75




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1243000
  custom_metrics: {}
  date: 2021-10-29_06-28-30
  done: false
  episode_len_mean: 305.89
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -30.589000000000166
  episode_reward_min: -55.40000000000052
  episodes_this_iter: 4
  episodes_total: 4233
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3544513638001297
          cur_lr: 5.000000000000001e-05
          entropy: 0.5053467099865278
          entropy_coeff: 0.009999999999999998
          kl: 0.014136272748401523
          policy_loss: -0.1323895930416054
          total_loss: 1.136884100569619
          vf_explained_var: 0.5871464610099792
          vf_loss: 1.2693165434731377
    num_agent_steps_sampled: 1243000
    num_agent_steps_trained: 1243000
    num_steps_sampled: 1243000
    num_steps_trained: 1243000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1243,33722.8,1243000,-30.589,-20.3,-55.4,305.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1244000
  custom_metrics: {}
  date: 2021-10-29_06-28-56
  done: false
  episode_len_mean: 299.39
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.939000000000156
  episode_reward_min: -52.80000000000048
  episodes_this_iter: 4
  episodes_total: 4237
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3544513638001297
          cur_lr: 5.000000000000001e-05
          entropy: 0.8988246560096741
          entropy_coeff: 0.009999999999999998
          kl: 0.01328570691107329
          policy_loss: 0.04741052724421024
          total_loss: 0.8247679786549674
          vf_explained_var: 0.524761974811554
          vf_loss: 0.7816365549961726
    num_agent_steps_sampled: 1244000
    num_agent_steps_trained: 1244000
    num_steps_sampled: 1244000
    num_steps_trained: 1244000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1244,33748.8,1244000,-29.939,-20.3,-52.8,299.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1245000
  custom_metrics: {}
  date: 2021-10-29_06-29-24
  done: false
  episode_len_mean: 295.9
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.59000000000015
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 4
  episodes_total: 4241
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3544513638001297
          cur_lr: 5.000000000000001e-05
          entropy: 0.7712722036573622
          entropy_coeff: 0.009999999999999998
          kl: 0.0064875340656929915
          policy_loss: -0.010793411855896313
          total_loss: 0.8656346129046546
          vf_explained_var: 0.5060369968414307
          vf_loss: 0.8818412211206225
    num_agent_steps_sampled: 1245000
    num_agent_steps_trained: 1245000
    num_steps_sampled: 1245000
    num_steps_trained: 1245000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1245,33776.9,1245000,-29.59,-20.3,-47.5,295.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1246000
  custom_metrics: {}
  date: 2021-10-29_06-29-50
  done: false
  episode_len_mean: 293.57
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.357000000000152
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 3
  episodes_total: 4244
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3544513638001297
          cur_lr: 5.000000000000001e-05
          entropy: 0.9678417219056024
          entropy_coeff: 0.009999999999999998
          kl: 0.014233676057408613
          policy_loss: -0.02874505817890167
          total_loss: 0.8521158006456163
          vf_explained_var: 0.2878335416316986
          vf_loss: 0.8854941363135974
    num_agent_steps_sampled: 1246000
    num_agent_steps_trained: 1246000
    num_steps_sampled: 1246000
    num_steps_trained: 1246000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1246,33803.3,1246000,-29.357,-20.3,-47.5,293.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1247000
  custom_metrics: {}
  date: 2021-10-29_06-30-17
  done: false
  episode_len_mean: 290.6
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -29.060000000000144
  episode_reward_min: -47.500000000000405
  episodes_this_iter: 4
  episodes_total: 4248
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3544513638001297
          cur_lr: 5.000000000000001e-05
          entropy: 0.9187362405988905
          entropy_coeff: 0.009999999999999998
          kl: 0.02184007344751472
          policy_loss: -0.018376265383428996
          total_loss: 0.9790587176879247
          vf_explained_var: 0.39950698614120483
          vf_loss: 0.9988811171717114
    num_agent_steps_sampled: 1247000
    num_agent_steps_trained: 1247000
    num_steps_sampled: 1247000
    num_steps_trained: 1247000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1247,33830,1247000,-29.06,-20.3,-47.5,290.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1248000
  custom_metrics: {}
  date: 2021-10-29_06-30-44
  done: false
  episode_len_mean: 289.57
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.957000000000143
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 3
  episodes_total: 4251
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5316770457001948
          cur_lr: 5.000000000000001e-05
          entropy: 0.777813098165724
          entropy_coeff: 0.009999999999999998
          kl: 0.005204784128214928
          policy_loss: -0.06572594617803891
          total_loss: 1.03963130513827
          vf_explained_var: 0.22904004156589508
          vf_loss: 1.1103681120607587
    num_agent_steps_sampled: 1248000
    num_agent_steps_trained: 1248000
    num_steps_sampled: 1248000
    num_steps_trained: 1248000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1248,33857.4,1248000,-28.957,-20.3,-46.6,289.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1249000
  custom_metrics: {}
  date: 2021-10-29_06-31-10
  done: false
  episode_len_mean: 286.37
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.63700000000014
  episode_reward_min: -45.80000000000038
  episodes_this_iter: 4
  episodes_total: 4255
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5316770457001948
          cur_lr: 5.000000000000001e-05
          entropy: 0.8953817248344421
          entropy_coeff: 0.009999999999999998
          kl: 0.006264540857282776
          policy_loss: -0.0010039799743228489
          total_loss: 1.1480050053861406
          vf_explained_var: 0.1810671091079712
          vf_loss: 1.1546320796012879
    num_agent_steps_sampled: 1249000
    num_agent_steps_trained: 1249000
    num_steps_sampled: 1249000
    num_steps_trained: 1249000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1249,33883.4,1249000,-28.637,-20.3,-45.8,286.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1250000
  custom_metrics: {}
  date: 2021-10-29_06-31-36
  done: false
  episode_len_mean: 283.37
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.337000000000135
  episode_reward_min: -43.00000000000034
  episodes_this_iter: 4
  episodes_total: 4259
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5316770457001948
          cur_lr: 5.000000000000001e-05
          entropy: 0.9160361309846242
          entropy_coeff: 0.009999999999999998
          kl: 0.008863822051059757
          policy_loss: 0.028397652010122936
          total_loss: 1.1363457944658069
          vf_explained_var: 0.2014094889163971
          vf_loss: 1.1123958044581943
    num_agent_steps_sampled: 1250000
    num_agent_steps_trained: 1250000
    num_steps_sampled: 1250000
    num_steps_trained: 1250000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1250,33908.9,1250000,-28.337,-20.3,-43,283.37




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1251000
  custom_metrics: {}
  date: 2021-10-29_06-32-21
  done: false
  episode_len_mean: 281.63
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.16300000000014
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4262
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5316770457001948
          cur_lr: 5.000000000000001e-05
          entropy: 0.856903945075141
          entropy_coeff: 0.009999999999999998
          kl: 0.008311338406897918
          policy_loss: 0.04704605663816134
          total_loss: 0.6079107122288809
          vf_explained_var: 0.3117985725402832
          vf_loss: 0.5650147576298978
    num_agent_steps_sampled: 1251000
    num_agent_steps_trained: 1251000
    num_steps_sampled: 1251000
    num_steps_trained: 1251000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1251,33953.8,1251000,-28.163,-20.3,-39.4,281.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1252000
  custom_metrics: {}
  date: 2021-10-29_06-32-47
  done: false
  episode_len_mean: 282.5
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.250000000000135
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4265
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5316770457001948
          cur_lr: 5.000000000000001e-05
          entropy: 0.9798648297786713
          entropy_coeff: 0.009999999999999998
          kl: 0.021905229846504427
          policy_loss: -0.10775945832331975
          total_loss: 1.2443842079904344
          vf_explained_var: 0.2054300457239151
          vf_loss: 1.3502958046065436
    num_agent_steps_sampled: 1252000
    num_agent_steps_trained: 1252000
    num_steps_sampled: 1252000
    num_steps_trained: 1252000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1252,33980.2,1252000,-28.25,-20.3,-39.4,282.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1253000
  custom_metrics: {}
  date: 2021-10-29_06-33-12
  done: false
  episode_len_mean: 281.36
  episode_media: {}
  episode_reward_max: -20.30000000000002
  episode_reward_mean: -28.136000000000127
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 4
  episodes_total: 4269
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 1.1013216025299497
          entropy_coeff: 0.009999999999999998
          kl: 0.009957335205178058
          policy_loss: 0.013396077023612129
          total_loss: 1.0414142505990134
          vf_explained_var: 0.42805683612823486
          vf_loss: 1.0310902673337194
    num_agent_steps_sampled: 1253000
    num_agent_steps_trained: 1253000
    num_steps_sampled: 1253000
    num_steps_trained: 1253000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1253,34004.6,1253000,-28.136,-20.3,-39.4,281.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1254000
  custom_metrics: {}
  date: 2021-10-29_06-33-40
  done: false
  episode_len_mean: 281.09
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.109000000000133
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4272
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.8325011319584317
          entropy_coeff: 0.009999999999999998
          kl: 0.009073159439380637
          policy_loss: -0.06845564188228713
          total_loss: 0.6622394843233956
          vf_explained_var: 0.613537609577179
          vf_loss: 0.7317841384145949
    num_agent_steps_sampled: 1254000
    num_agent_steps_trained: 1254000
    num_steps_sampled: 1254000
    num_steps_trained: 1254000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1254,34032.9,1254000,-28.109,-21.8,-39.4,281.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1255000
  custom_metrics: {}
  date: 2021-10-29_06-34-05
  done: false
  episode_len_mean: 282.97
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.297000000000136
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 4
  episodes_total: 4276
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.9984147244029575
          entropy_coeff: 0.009999999999999998
          kl: 0.017659612176685022
          policy_loss: 0.06591721971829732
          total_loss: 0.9745422257317438
          vf_explained_var: 0.6432090997695923
          vf_loss: 0.9045253439082039
    num_agent_steps_sampled: 1255000
    num_agent_steps_trained: 1255000
    num_steps_sampled: 1255000
    num_steps_trained: 1255000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1255,34058,1255000,-28.297,-21.8,-39.4,282.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1256000
  custom_metrics: {}
  date: 2021-10-29_06-34-30
  done: false
  episode_len_mean: 281.51
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.151000000000128
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4279
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.9726354618867238
          entropy_coeff: 0.009999999999999998
          kl: 0.008461088498787541
          policy_loss: 0.06236467394563887
          total_loss: 0.9026221205790838
          vf_explained_var: 0.21773213148117065
          vf_loss: 0.8432359528210428
    num_agent_steps_sampled: 1256000
    num_agent_steps_trained: 1256000
    num_steps_sampled: 1256000
    num_steps_trained: 1256000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1256,34082.4,1256000,-28.151,-21.8,-39.4,281.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1257000
  custom_metrics: {}
  date: 2021-10-29_06-34-56
  done: false
  episode_len_mean: 279.84
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.98400000000013
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 4282
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 1.0134714927938249
          entropy_coeff: 0.009999999999999998
          kl: 0.010764559082385834
          policy_loss: -0.04900950226518843
          total_loss: 0.8502611600690417
          vf_explained_var: 0.5314321517944336
          vf_loss: 0.9008204744921791
    num_agent_steps_sampled: 1257000
    num_agent_steps_trained: 1257000
    num_steps_sampled: 1257000
    num_steps_trained: 1257000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1257,34108.4,1257000,-27.984,-21.8,-38.6,279.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1258000
  custom_metrics: {}
  date: 2021-10-29_06-35-22
  done: false
  episode_len_mean: 279.51
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -27.951000000000125
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 4286
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.9873629762066736
          entropy_coeff: 0.009999999999999998
          kl: 0.00862641353178317
          policy_loss: -0.06652862686249945
          total_loss: 1.2025375498665705
          vf_explained_var: 0.36614540219306946
          vf_loss: 1.2720601154698266
    num_agent_steps_sampled: 1258000
    num_agent_steps_trained: 1258000
    num_steps_sampled: 1258000
    num_steps_trained: 1258000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1258,34134.8,1258000,-27.951,-21.8,-38.6,279.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1259000
  custom_metrics: {}
  date: 2021-10-29_06-35-47
  done: false
  episode_len_mean: 281.89
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.189000000000128
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 4289
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 1.1447108341587915
          entropy_coeff: 0.009999999999999998
          kl: 0.006237694772352932
          policy_loss: -0.013698491040203306
          total_loss: 0.9107248571183947
          vf_explained_var: 0.3596382439136505
          vf_loss: 0.9308957955903477
    num_agent_steps_sampled: 1259000
    num_agent_steps_trained: 1259000
    num_steps_sampled: 1259000
    num_steps_trained: 1259000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1259,34159.5,1259000,-28.189,-21.8,-38.6,281.89




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1260000
  custom_metrics: {}
  date: 2021-10-29_06-36-31
  done: false
  episode_len_mean: 280.45
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.045000000000126
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 4293
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.795980593893263
          entropy_coeff: 0.009999999999999998
          kl: 0.009257875542198077
          policy_loss: 0.03263820658127467
          total_loss: 0.849341426955329
          vf_explained_var: 0.6210734844207764
          vf_loss: 0.8172797305716408
    num_agent_steps_sampled: 1260000
    num_agent_steps_trained: 1260000
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1260,34203.6,1260000,-28.045,-21.8,-38.6,280.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1261000
  custom_metrics: {}
  date: 2021-10-29_06-36-59
  done: false
  episode_len_mean: 280.71
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -28.07100000000013
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 3
  episodes_total: 4296
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.8965971377160814
          entropy_coeff: 0.009999999999999998
          kl: 0.007312196224573123
          policy_loss: -0.077557429836856
          total_loss: 1.050180306699541
          vf_explained_var: 0.22772739827632904
          vf_loss: 1.1308721197976006
    num_agent_steps_sampled: 1261000
    num_agent_steps_trained: 1261000
    num_steps_sampled: 1261000
    num_steps_trained: 1261000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1261,34232.2,1261000,-28.071,-21.8,-38.6,280.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1262000
  custom_metrics: {}
  date: 2021-10-29_06-37-28
  done: false
  episode_len_mean: 280.01
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.001000000000136
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 4300
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.8649828751881917
          entropy_coeff: 0.009999999999999998
          kl: 0.00952952153286838
          policy_loss: 0.004471974571545919
          total_loss: 1.0524129026465945
          vf_explained_var: 0.3543086051940918
          vf_loss: 1.048990797996521
    num_agent_steps_sampled: 1262000
    num_agent_steps_trained: 1262000
    num_steps_sampled: 1262000
    num_steps_trained: 1262000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1262,34260.4,1262000,-28.001,-22,-38.6,280.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1263000
  custom_metrics: {}
  date: 2021-10-29_06-37-54
  done: false
  episode_len_mean: 279.45
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.94500000000013
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 4304
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.9385584433873494
          entropy_coeff: 0.009999999999999998
          kl: 0.012225916428510095
          policy_loss: 0.022152342771490415
          total_loss: 1.3859619180361429
          vf_explained_var: 0.1557142585515976
          vf_loss: 1.3634448097811804
    num_agent_steps_sampled: 1263000
    num_agent_steps_trained: 1263000
    num_steps_sampled: 1263000
    num_steps_trained: 1263000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1263,34286.5,1263000,-27.945,-22,-38.6,279.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1264000
  custom_metrics: {}
  date: 2021-10-29_06-38-23
  done: false
  episode_len_mean: 278.98
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.898000000000128
  episode_reward_min: -38.60000000000028
  episodes_this_iter: 4
  episodes_total: 4308
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.8001910593774584
          entropy_coeff: 0.009999999999999998
          kl: 0.01659072487355707
          policy_loss: -0.00022439385453859966
          total_loss: 0.8364641043874953
          vf_explained_var: 0.5554454922676086
          vf_loss: 0.8314590394496918
    num_agent_steps_sampled: 1264000
    num_agent_steps_trained: 1264000
    num_steps_sampled: 1264000
    num_steps_trained: 126400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1264,34315.3,1264000,-27.898,-22,-38.6,278.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1265000
  custom_metrics: {}
  date: 2021-10-29_06-38-46
  done: false
  episode_len_mean: 279.9
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -27.990000000000133
  episode_reward_min: -39.20000000000029
  episodes_this_iter: 3
  episodes_total: 4311
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.9224433819452922
          entropy_coeff: 0.009999999999999998
          kl: 0.014703541105484528
          policy_loss: 0.03948123256365458
          total_loss: 0.9159041914674971
          vf_explained_var: 0.4879683554172516
          vf_loss: 0.8739210884604189
    num_agent_steps_sampled: 1265000
    num_agent_steps_trained: 1265000
    num_steps_sampled: 1265000
    num_steps_trained: 1265000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1265,34339.1,1265000,-27.99,-22,-39.2,279.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1266000
  custom_metrics: {}
  date: 2021-10-29_06-39-11
  done: false
  episode_len_mean: 280.01
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.001000000000136
  episode_reward_min: -39.20000000000029
  episodes_this_iter: 3
  episodes_total: 4314
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.9894564946492513
          entropy_coeff: 0.009999999999999998
          kl: 0.015169338653500277
          policy_loss: 0.06215184579292933
          total_loss: 0.7851351456509696
          vf_explained_var: 0.21461084485054016
          vf_loss: 0.7207800721956624
    num_agent_steps_sampled: 1266000
    num_agent_steps_trained: 1266000
    num_steps_sampled: 1266000
    num_steps_trained: 1266000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1266,34363.7,1266000,-28.001,-22,-39.2,280.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1267000
  custom_metrics: {}
  date: 2021-10-29_06-39-35
  done: false
  episode_len_mean: 282.64
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.264000000000134
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4317
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 1.0590221272574531
          entropy_coeff: 0.009999999999999998
          kl: 0.007198368253352743
          policy_loss: 0.05745675762494405
          total_loss: 1.0649562487999598
          vf_explained_var: -0.26243990659713745
          vf_loss: 1.0123489051643344
    num_agent_steps_sampled: 1267000
    num_agent_steps_trained: 1267000
    num_steps_sampled: 1267000
    num_steps_trained: 1267000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1267,34387.2,1267000,-28.264,-22,-41.6,282.64




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1268000
  custom_metrics: {}
  date: 2021-10-29_06-40-19
  done: false
  episode_len_mean: 282.64
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.264000000000134
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4321
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.8514296412467957
          entropy_coeff: 0.009999999999999998
          kl: 0.009007920452404465
          policy_loss: -0.01968108539779981
          total_loss: 0.8785201172033946
          vf_explained_var: 0.5499282479286194
          vf_loss: 0.899531540605757
    num_agent_steps_sampled: 1268000
    num_agent_steps_trained: 1268000
    num_steps_sampled: 1268000
    num_steps_trained: 1268000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1268,34432,1268000,-28.264,-22,-41.6,282.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1269000
  custom_metrics: {}
  date: 2021-10-29_06-40-48
  done: false
  episode_len_mean: 283.34
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.334000000000138
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4324
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7975155685502922
          cur_lr: 5.000000000000001e-05
          entropy: 0.9898808770709567
          entropy_coeff: 0.009999999999999998
          kl: 0.003607906775807932
          policy_loss: 0.04453218827644984
          total_loss: 0.52173004647096
          vf_explained_var: 0.7997329831123352
          vf_loss: 0.48421931291619935
    num_agent_steps_sampled: 1269000
    num_agent_steps_trained: 1269000
    num_steps_sampled: 1269000
    num_steps_trained: 1269000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1269,34460.3,1269000,-28.334,-22,-41.6,283.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1270000
  custom_metrics: {}
  date: 2021-10-29_06-41-10
  done: false
  episode_len_mean: 284.61
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.46100000000014
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4327
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3987577842751461
          cur_lr: 5.000000000000001e-05
          entropy: 1.0103565937942929
          entropy_coeff: 0.009999999999999998
          kl: 0.013175321048777751
          policy_loss: -0.14512187697821194
          total_loss: 0.7093933002816306
          vf_explained_var: 0.6387007832527161
          vf_loss: 0.8593649923801422
    num_agent_steps_sampled: 1270000
    num_agent_steps_trained: 1270000
    num_steps_sampled: 1270000
    num_steps_trained: 1270000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1270,34482.8,1270000,-28.461,-22,-41.6,284.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1271000
  custom_metrics: {}
  date: 2021-10-29_06-41-35
  done: false
  episode_len_mean: 287.15
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -28.715000000000135
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4331
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3987577842751461
          cur_lr: 5.000000000000001e-05
          entropy: 0.9928325990835826
          entropy_coeff: 0.009999999999999998
          kl: 0.012374466136993414
          policy_loss: -0.03986183835400475
          total_loss: 0.598270454009374
          vf_explained_var: 0.7671728134155273
          vf_loss: 0.6431261989805434
    num_agent_steps_sampled: 1271000
    num_agent_steps_trained: 1271000
    num_steps_sampled: 1271000
    num_steps_trained: 1271000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1271,34507.2,1271000,-28.715,-22,-41.6,287.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1272000
  custom_metrics: {}
  date: 2021-10-29_06-41-58
  done: false
  episode_len_mean: 289.27
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -28.927000000000135
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 2
  episodes_total: 4333
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3987577842751461
          cur_lr: 5.000000000000001e-05
          entropy: 1.053390167819129
          entropy_coeff: 0.009999999999999998
          kl: 0.03540198682546366
          policy_loss: -0.06614723238680098
          total_loss: 0.7468267136149936
          vf_explained_var: 0.43920889496803284
          vf_loss: 0.809391019327773
    num_agent_steps_sampled: 1272000
    num_agent_steps_trained: 1272000
    num_steps_sampled: 1272000
    num_steps_trained: 1272000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1272,34530.5,1272000,-28.927,-22.7,-41.6,289.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1273000
  custom_metrics: {}
  date: 2021-10-29_06-42-22
  done: false
  episode_len_mean: 290.9
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.090000000000142
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4337
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 1.007069902949863
          entropy_coeff: 0.009999999999999998
          kl: 0.00839349483309238
          policy_loss: 0.03325203019711706
          total_loss: 1.1115519828266567
          vf_explained_var: 0.39969342947006226
          vf_loss: 1.0833501723077563
    num_agent_steps_sampled: 1273000
    num_agent_steps_trained: 1273000
    num_steps_sampled: 1273000
    num_steps_trained: 1273000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1273,34554.6,1273000,-29.09,-22.7,-41.6,290.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1274000
  custom_metrics: {}
  date: 2021-10-29_06-42-44
  done: false
  episode_len_mean: 292.75
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.27500000000014
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 2
  episodes_total: 4339
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 1.2503682600127326
          entropy_coeff: 0.009999999999999998
          kl: 0.009955870379616902
          policy_loss: -0.059445490025811724
          total_loss: 0.7570661236842473
          vf_explained_var: -0.03005828894674778
          vf_loss: 0.8230603311210871
    num_agent_steps_sampled: 1274000
    num_agent_steps_trained: 1274000
    num_steps_sampled: 1274000
    num_steps_trained: 127400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1274,34576.3,1274000,-29.275,-22.7,-41.6,292.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1275000
  custom_metrics: {}
  date: 2021-10-29_06-43-07
  done: false
  episode_len_mean: 294.95
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.49500000000015
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4342
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 0.9294960942533281
          entropy_coeff: 0.009999999999999998
          kl: 0.011831987493962023
          policy_loss: -0.11084972330265574
          total_loss: 0.9321803490320841
          vf_explained_var: 0.5655319094657898
          vf_loss: 1.045247886578242
    num_agent_steps_sampled: 1275000
    num_agent_steps_trained: 1275000
    num_steps_sampled: 1275000
    num_steps_trained: 1275000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1275,34599.8,1275000,-29.495,-22.7,-41.6,294.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1276000
  custom_metrics: {}
  date: 2021-10-29_06-43-36
  done: false
  episode_len_mean: 295.26
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.52600000000015
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4346
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 0.8738597452640533
          entropy_coeff: 0.009999999999999998
          kl: 0.009465484161257996
          policy_loss: 0.06585252736177709
          total_loss: 1.1246792468759748
          vf_explained_var: 0.45139193534851074
          vf_loss: 1.0619036734104157
    num_agent_steps_sampled: 1276000
    num_agent_steps_trained: 1276000
    num_steps_sampled: 1276000
    num_steps_trained: 1276000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1276,34628,1276000,-29.526,-22.7,-41.6,295.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1277000
  custom_metrics: {}
  date: 2021-10-29_06-44-02
  done: false
  episode_len_mean: 296.02
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.602000000000153
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4349
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5981366764127191
          cur_lr: 5.000000000000001e-05
          entropy: 0.8601844628651937
          entropy_coeff: 0.009999999999999998
          kl: 0.020081504411074903
          policy_loss: -0.1219795725411839
          total_loss: 0.8515971001651552
          vf_explained_var: 0.6353224515914917
          vf_loss: 0.9701670434739854
    num_agent_steps_sampled: 1277000
    num_agent_steps_trained: 1277000
    num_steps_sampled: 1277000
    num_steps_trained: 1277000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1277,34653.9,1277000,-29.602,-22.7,-41.6,296.02




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1278000
  custom_metrics: {}
  date: 2021-10-29_06-44-45
  done: false
  episode_len_mean: 296.15
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.61500000000015
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4353
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.0694818589422437
          entropy_coeff: 0.009999999999999998
          kl: 0.008115108450692708
          policy_loss: -0.03824528364671601
          total_loss: 1.222822564178043
          vf_explained_var: 0.20194485783576965
          vf_loss: 1.2644817458258735
    num_agent_steps_sampled: 1278000
    num_agent_steps_trained: 1278000
    num_steps_sampled: 1278000
    num_steps_trained: 1278000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1278,34697.7,1278000,-29.615,-22.7,-41.6,296.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1279000
  custom_metrics: {}
  date: 2021-10-29_06-45-10
  done: false
  episode_len_mean: 297.76
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.77600000000015
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4356
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.0165611267089845
          entropy_coeff: 0.009999999999999998
          kl: 0.008745908026158922
          policy_loss: 0.05155497276120716
          total_loss: 1.0637729316949844
          vf_explained_var: -0.22746115922927856
          vf_loss: 1.0145367078483105
    num_agent_steps_sampled: 1279000
    num_agent_steps_trained: 1279000
    num_steps_sampled: 1279000
    num_steps_trained: 1279000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1279,34722.3,1279000,-29.776,-22.7,-41.6,297.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1280000
  custom_metrics: {}
  date: 2021-10-29_06-45-33
  done: false
  episode_len_mean: 299.68
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.968000000000156
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4359
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.105395903189977
          entropy_coeff: 0.009999999999999998
          kl: 0.006918610448662024
          policy_loss: 0.0780580974287457
          total_loss: 0.6990864002042346
          vf_explained_var: 0.29006341099739075
          vf_loss: 0.6258748517682154
    num_agent_steps_sampled: 1280000
    num_agent_steps_trained: 1280000
    num_steps_sampled: 1280000
    num_steps_trained: 1280000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1280,34745,1280000,-29.968,-22.7,-41.6,299.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1281000
  custom_metrics: {}
  date: 2021-10-29_06-45-57
  done: false
  episode_len_mean: 300.8
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -30.080000000000158
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4362
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.0665327615208096
          entropy_coeff: 0.009999999999999998
          kl: 0.013844319892473806
          policy_loss: 0.03943506172961659
          total_loss: 0.890133085846901
          vf_explained_var: 0.10654735565185547
          vf_loss: 0.8489421591990524
    num_agent_steps_sampled: 1281000
    num_agent_steps_trained: 1281000
    num_steps_sampled: 1281000
    num_steps_trained: 1281000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1281,34769.6,1281000,-30.08,-22.7,-41.6,300.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1282000
  custom_metrics: {}
  date: 2021-10-29_06-46-26
  done: false
  episode_len_mean: 299.22
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.922000000000153
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4366
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 0.8271909713745117
          entropy_coeff: 0.009999999999999998
          kl: 0.009537954330651625
          policy_loss: 0.04618232593768173
          total_loss: 0.9871106750435299
          vf_explained_var: 0.5051712989807129
          vf_loss: 0.9406427522500356
    num_agent_steps_sampled: 1282000
    num_agent_steps_trained: 1282000
    num_steps_sampled: 1282000
    num_steps_trained: 1282000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1282,34798.4,1282000,-29.922,-22.7,-41.6,299.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1283000
  custom_metrics: {}
  date: 2021-10-29_06-46-51
  done: false
  episode_len_mean: 299.09
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.90900000000015
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4369
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.1911635703510708
          entropy_coeff: 0.009999999999999998
          kl: 0.007532139232079752
          policy_loss: 0.06739973119563526
          total_loss: 0.8968125654591454
          vf_explained_var: -0.005355050787329674
          vf_loss: 0.8345665960676141
    num_agent_steps_sampled: 1283000
    num_agent_steps_trained: 1283000
    num_steps_sampled: 1283000
    num_steps_trained: 1283000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1283,34823.5,1283000,-29.909,-22.7,-41.6,299.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1284000
  custom_metrics: {}
  date: 2021-10-29_06-47-20
  done: false
  episode_len_mean: 299.51
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.951000000000157
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4373
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 0.8930276877350277
          entropy_coeff: 0.009999999999999998
          kl: 0.0057922357199155766
          policy_loss: -0.005329943034383985
          total_loss: 1.3380189220110574
          vf_explained_var: 0.25349852442741394
          vf_loss: 1.3470823129018148
    num_agent_steps_sampled: 1284000
    num_agent_steps_trained: 1284000
    num_steps_sampled: 1284000
    num_steps_trained: 12840

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1284,34852.5,1284000,-29.951,-22.7,-41.6,299.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1285000
  custom_metrics: {}
  date: 2021-10-29_06-47-48
  done: false
  episode_len_mean: 298.63
  episode_media: {}
  episode_reward_max: -22.700000000000053
  episode_reward_mean: -29.863000000000156
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4376
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.006371412674586
          entropy_coeff: 0.009999999999999998
          kl: 0.016993088511913494
          policy_loss: -0.001924495150645574
          total_loss: 0.6700218670898014
          vf_explained_var: 0.48308369517326355
          vf_loss: 0.6667637904485066
    num_agent_steps_sampled: 1285000
    num_agent_steps_trained: 1285000
    num_steps_sampled: 1285000
    num_steps_trained: 1285000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1285,34880.3,1285000,-29.863,-22.7,-41.6,298.63




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1286000
  custom_metrics: {}
  date: 2021-10-29_06-48-30
  done: false
  episode_len_mean: 297.72
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.77200000000015
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4380
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.0453189028633965
          entropy_coeff: 0.009999999999999998
          kl: 0.013162884636677082
          policy_loss: -0.032244772795173855
          total_loss: 1.4400711562898425
          vf_explained_var: 0.19485589861869812
          vf_loss: 1.4709593097368876
    num_agent_steps_sampled: 1286000
    num_agent_steps_trained: 1286000
    num_steps_sampled: 1286000
    num_steps_trained: 1286000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1286,34922.5,1286000,-29.772,-22.4,-41.6,297.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1287000
  custom_metrics: {}
  date: 2021-10-29_06-48-55
  done: false
  episode_len_mean: 299.13
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.913000000000157
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4383
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.180421761671702
          entropy_coeff: 0.009999999999999998
          kl: 0.007993140440296444
          policy_loss: 0.03648788192205959
          total_loss: 1.1844135661919912
          vf_explained_var: -0.19950208067893982
          vf_loss: 1.1525584078497357
    num_agent_steps_sampled: 1287000
    num_agent_steps_trained: 1287000
    num_steps_sampled: 1287000
    num_steps_trained: 1287000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1287,34947.4,1287000,-29.913,-22.4,-41.6,299.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1288000
  custom_metrics: {}
  date: 2021-10-29_06-49-23
  done: false
  episode_len_mean: 298.75
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.875000000000156
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4386
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.0146988352139792
          entropy_coeff: 0.009999999999999998
          kl: 0.01428903158449357
          policy_loss: -0.015039249137043954
          total_loss: 0.6693856510851118
          vf_explained_var: 0.5139588117599487
          vf_loss: 0.6817516866657469
    num_agent_steps_sampled: 1288000
    num_agent_steps_trained: 1288000
    num_steps_sampled: 1288000
    num_steps_trained: 1288000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1288,34975.5,1288000,-29.875,-22.4,-41.6,298.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1289000
  custom_metrics: {}
  date: 2021-10-29_06-49-47
  done: false
  episode_len_mean: 299.4
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.940000000000158
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4389
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.0854372594091628
          entropy_coeff: 0.009999999999999998
          kl: 0.009671806876945589
          policy_loss: 0.03126353944341342
          total_loss: 0.8069836702611711
          vf_explained_var: 0.2546962797641754
          vf_loss: 0.777896904034747
    num_agent_steps_sampled: 1289000
    num_agent_steps_trained: 1289000
    num_steps_sampled: 1289000
    num_steps_trained: 1289000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1289,34998.9,1289000,-29.94,-22.4,-41.6,299.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1290000
  custom_metrics: {}
  date: 2021-10-29_06-50-12
  done: false
  episode_len_mean: 301.45
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.14500000000016
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4392
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.020865547657013
          entropy_coeff: 0.009999999999999998
          kl: 0.008340063862342362
          policy_loss: -0.008200763662656149
          total_loss: 1.0194855193297068
          vf_explained_var: 0.21946710348129272
          vf_loss: 1.0304121964507633
    num_agent_steps_sampled: 1290000
    num_agent_steps_trained: 1290000
    num_steps_sampled: 1290000
    num_steps_trained: 1290000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1290,35023.6,1290000,-30.145,-22.4,-41.6,301.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1291000
  custom_metrics: {}
  date: 2021-10-29_06-50-38
  done: false
  episode_len_mean: 301.84
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.18400000000016
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4396
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 1.2771155516306558
          entropy_coeff: 0.009999999999999998
          kl: 0.013814138113841443
          policy_loss: -0.12552841578920682
          total_loss: 1.4661308526992798
          vf_explained_var: 0.28199899196624756
          vf_loss: 1.5920363042089674
    num_agent_steps_sampled: 1291000
    num_agent_steps_trained: 1291000
    num_steps_sampled: 1291000
    num_steps_trained: 1291000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1291,35050,1291000,-30.184,-22.4,-41.6,301.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1292000
  custom_metrics: {}
  date: 2021-10-29_06-51-04
  done: false
  episode_len_mean: 302.61
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.261000000000163
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4399
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8972050146190786
          cur_lr: 5.000000000000001e-05
          entropy: 0.9759819891717699
          entropy_coeff: 0.009999999999999998
          kl: 0.030370309836028633
          policy_loss: 0.08202266370256742
          total_loss: 0.7882768219543828
          vf_explained_var: 0.7172138690948486
          vf_loss: 0.6887655837668313
    num_agent_steps_sampled: 1292000
    num_agent_steps_trained: 1292000
    num_steps_sampled: 1292000
    num_steps_trained: 1292000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1292,35076.1,1292000,-30.261,-22.4,-41.6,302.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1293000
  custom_metrics: {}
  date: 2021-10-29_06-51-30
  done: false
  episode_len_mean: 303.48
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.348000000000162
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4402
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.345807521928618
          cur_lr: 5.000000000000001e-05
          entropy: 1.0521396630340152
          entropy_coeff: 0.009999999999999998
          kl: 0.012696894227232115
          policy_loss: -0.06946796634131008
          total_loss: 1.2528809256023832
          vf_explained_var: 0.24867159128189087
          vf_loss: 1.3157827271355522
    num_agent_steps_sampled: 1293000
    num_agent_steps_trained: 1293000
    num_steps_sampled: 1293000
    num_steps_trained: 1293000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1293,35102.4,1293000,-30.348,-22.4,-41.6,303.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1294000
  custom_metrics: {}
  date: 2021-10-29_06-51-59
  done: false
  episode_len_mean: 303.69
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.36900000000016
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4406
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.345807521928618
          cur_lr: 5.000000000000001e-05
          entropy: 0.7567926595608393
          entropy_coeff: 0.009999999999999998
          kl: 0.012376726305380873
          policy_loss: 0.03309385536445512
          total_loss: 0.9913332339790132
          vf_explained_var: 0.4465671479701996
          vf_loss: 0.9491506069898605
    num_agent_steps_sampled: 1294000
    num_agent_steps_trained: 1294000
    num_steps_sampled: 1294000
    num_steps_trained: 1294000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1294,35131.3,1294000,-30.369,-22.4,-41.6,303.69




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1295000
  custom_metrics: {}
  date: 2021-10-29_06-52-42
  done: false
  episode_len_mean: 303.02
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.302000000000156
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 4
  episodes_total: 4410
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.345807521928618
          cur_lr: 5.000000000000001e-05
          entropy: 0.9907379660341474
          entropy_coeff: 0.009999999999999998
          kl: 0.010405595009489035
          policy_loss: 0.021115239668223592
          total_loss: 1.0339858929316204
          vf_explained_var: 0.5269712805747986
          vf_loss: 1.008774110343721
    num_agent_steps_sampled: 1295000
    num_agent_steps_trained: 1295000
    num_steps_sampled: 1295000
    num_steps_trained: 1295000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1295,35173.6,1295000,-30.302,-22.4,-41.6,303.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1296000
  custom_metrics: {}
  date: 2021-10-29_06-53-08
  done: false
  episode_len_mean: 303.81
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.381000000000164
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4413
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.345807521928618
          cur_lr: 5.000000000000001e-05
          entropy: 1.1811330305205452
          entropy_coeff: 0.009999999999999998
          kl: 0.017483408749322395
          policy_loss: 0.04197820954852634
          total_loss: 0.9228361931112078
          vf_explained_var: 0.31117263436317444
          vf_loss: 0.8691400123967065
    num_agent_steps_sampled: 1296000
    num_agent_steps_trained: 1296000
    num_steps_sampled: 1296000
    num_steps_trained: 1296000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1296,35199.6,1296000,-30.381,-22.4,-41.6,303.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1297000
  custom_metrics: {}
  date: 2021-10-29_06-53-32
  done: false
  episode_len_mean: 304.13
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.41300000000016
  episode_reward_min: -41.60000000000032
  episodes_this_iter: 3
  episodes_total: 4416
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.345807521928618
          cur_lr: 5.000000000000001e-05
          entropy: 1.2683138575818804
          entropy_coeff: 0.009999999999999998
          kl: 0.029871095568735247
          policy_loss: 0.07407007991439767
          total_loss: 1.0879772269063526
          vf_explained_var: 0.17747430503368378
          vf_loss: 0.9863895401358604
    num_agent_steps_sampled: 1297000
    num_agent_steps_trained: 1297000
    num_steps_sampled: 1297000
    num_steps_trained: 1297000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1297,35224.1,1297000,-30.413,-22.4,-41.6,304.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1298000
  custom_metrics: {}
  date: 2021-10-29_06-54-00
  done: false
  episode_len_mean: 303.09
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.30900000000016
  episode_reward_min: -40.80000000000031
  episodes_this_iter: 3
  episodes_total: 4419
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0187112828929266
          cur_lr: 5.000000000000001e-05
          entropy: 1.0233216298951042
          entropy_coeff: 0.009999999999999998
          kl: 0.009168415718028516
          policy_loss: -0.019752647231022515
          total_loss: 1.1401147431797451
          vf_explained_var: 0.3625561594963074
          vf_loss: 1.1515922082795038
    num_agent_steps_sampled: 1298000
    num_agent_steps_trained: 1298000
    num_steps_sampled: 1298000
    num_steps_trained: 1298000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1298,35251.7,1298000,-30.309,-22.4,-40.8,303.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1299000
  custom_metrics: {}
  date: 2021-10-29_06-54-28
  done: false
  episode_len_mean: 304.12
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.412000000000166
  episode_reward_min: -40.80000000000031
  episodes_this_iter: 4
  episodes_total: 4423
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0187112828929266
          cur_lr: 5.000000000000001e-05
          entropy: 0.8940698544184367
          entropy_coeff: 0.009999999999999998
          kl: 0.006475248116628605
          policy_loss: 0.052982790892322856
          total_loss: 0.8777993275059595
          vf_explained_var: 0.2371044009923935
          vf_loss: 0.8206855654716492
    num_agent_steps_sampled: 1299000
    num_agent_steps_trained: 1299000
    num_steps_sampled: 1299000
    num_steps_trained: 1299000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1299,35279.5,1299000,-30.412,-22.4,-40.8,304.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1300000
  custom_metrics: {}
  date: 2021-10-29_06-54-57
  done: false
  episode_len_mean: 303.17
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.317000000000164
  episode_reward_min: -40.80000000000031
  episodes_this_iter: 4
  episodes_total: 4427
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0187112828929266
          cur_lr: 5.000000000000001e-05
          entropy: 0.9760357499122619
          entropy_coeff: 0.009999999999999998
          kl: 0.005205604289669531
          policy_loss: 0.031497468302647276
          total_loss: 1.152748402622011
          vf_explained_var: 0.28468385338783264
          vf_loss: 1.1205026706059773
    num_agent_steps_sampled: 1300000
    num_agent_steps_trained: 1300000
    num_steps_sampled: 1300000
    num_steps_trained: 1300000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1300,35309.2,1300000,-30.317,-22.4,-40.8,303.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1301000
  custom_metrics: {}
  date: 2021-10-29_06-55-26
  done: false
  episode_len_mean: 301.94
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.19400000000016
  episode_reward_min: -40.80000000000031
  episodes_this_iter: 3
  episodes_total: 4430
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0187112828929266
          cur_lr: 5.000000000000001e-05
          entropy: 0.8672918571366204
          entropy_coeff: 0.009999999999999998
          kl: 0.0032520812860563336
          policy_loss: 0.014011032672391998
          total_loss: 0.8325152887238396
          vf_explained_var: 0.3352242708206177
          vf_loss: 0.8206121583779653
    num_agent_steps_sampled: 1301000
    num_agent_steps_trained: 1301000
    num_steps_sampled: 1301000
    num_steps_trained: 1301000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1301,35338.2,1301000,-30.194,-22.4,-40.8,301.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1302000
  custom_metrics: {}
  date: 2021-10-29_06-55-51
  done: false
  episode_len_mean: 299.53
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.953000000000152
  episode_reward_min: -40.80000000000031
  episodes_this_iter: 4
  episodes_total: 4434
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0093556414464633
          cur_lr: 5.000000000000001e-05
          entropy: 0.959830868906445
          entropy_coeff: 0.009999999999999998
          kl: 0.010799035533622556
          policy_loss: 0.015614137550195058
          total_loss: 0.951843574974272
          vf_explained_var: 0.17773276567459106
          vf_loss: 0.9349276774459415
    num_agent_steps_sampled: 1302000
    num_agent_steps_trained: 1302000
    num_steps_sampled: 1302000
    num_steps_trained: 1302000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1302,35362.6,1302000,-29.953,-22.4,-40.8,299.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1303000
  custom_metrics: {}
  date: 2021-10-29_06-56-15
  done: false
  episode_len_mean: 299.59
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.959000000000156
  episode_reward_min: -40.80000000000031
  episodes_this_iter: 3
  episodes_total: 4437
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0093556414464633
          cur_lr: 5.000000000000001e-05
          entropy: 1.215603518486023
          entropy_coeff: 0.009999999999999998
          kl: 0.007347748202515526
          policy_loss: 0.08610911228590541
          total_loss: 0.9445878247419993
          vf_explained_var: -0.09730120748281479
          vf_loss: 0.8632182610531648
    num_agent_steps_sampled: 1303000
    num_agent_steps_trained: 1303000
    num_steps_sampled: 1303000
    num_steps_trained: 1303000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1303,35387.1,1303000,-29.959,-22.4,-40.8,299.59




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1304000
  custom_metrics: {}
  date: 2021-10-29_06-56-58
  done: false
  episode_len_mean: 296.84
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.684000000000154
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4440
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0093556414464633
          cur_lr: 5.000000000000001e-05
          entropy: 0.8801297002368503
          entropy_coeff: 0.009999999999999998
          kl: 0.002246402489576048
          policy_loss: -0.03133576015631358
          total_loss: 1.3289365536636777
          vf_explained_var: 0.09281409531831741
          vf_loss: 1.366806197166443
    num_agent_steps_sampled: 1304000
    num_agent_steps_trained: 1304000
    num_steps_sampled: 1304000
    num_steps_trained: 1304000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1304,35430,1304000,-29.684,-22.4,-39.4,296.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1305000
  custom_metrics: {}
  date: 2021-10-29_06-57-27
  done: false
  episode_len_mean: 296.14
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.614000000000157
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 4
  episodes_total: 4444
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5046778207232316
          cur_lr: 5.000000000000001e-05
          entropy: 1.0811594665050506
          entropy_coeff: 0.009999999999999998
          kl: 0.016081395974025466
          policy_loss: 0.04464811806877454
          total_loss: 1.1671201090017955
          vf_explained_var: 0.4953707456588745
          vf_loss: 1.1251676625675626
    num_agent_steps_sampled: 1305000
    num_agent_steps_trained: 1305000
    num_steps_sampled: 1305000
    num_steps_trained: 1305000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1305,35458.7,1305000,-29.614,-22.4,-39.4,296.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1306000
  custom_metrics: {}
  date: 2021-10-29_06-57-56
  done: false
  episode_len_mean: 296.32
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.632000000000158
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4447
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5046778207232316
          cur_lr: 5.000000000000001e-05
          entropy: 1.028497534328037
          entropy_coeff: 0.009999999999999998
          kl: 0.004984760218578938
          policy_loss: 0.012323185553153355
          total_loss: 0.956596573193868
          vf_explained_var: 0.26130905747413635
          vf_loss: 0.9520426740248998
    num_agent_steps_sampled: 1306000
    num_agent_steps_trained: 1306000
    num_steps_sampled: 1306000
    num_steps_trained: 1306000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1306,35487.3,1306000,-29.632,-22.4,-39.4,296.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1307000
  custom_metrics: {}
  date: 2021-10-29_06-58-22
  done: false
  episode_len_mean: 294.56
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.456000000000152
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 4
  episodes_total: 4451
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2523389103616158
          cur_lr: 5.000000000000001e-05
          entropy: 1.0186371697319878
          entropy_coeff: 0.009999999999999998
          kl: 0.025884105430021027
          policy_loss: 0.0007794189370340771
          total_loss: 0.9621141261524624
          vf_explained_var: 0.4301605522632599
          vf_loss: 0.9649895065360599
    num_agent_steps_sampled: 1307000
    num_agent_steps_trained: 1307000
    num_steps_sampled: 1307000
    num_steps_trained: 1307000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1307,35513.8,1307000,-29.456,-22.4,-39.4,294.56


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1308000
  custom_metrics: {}
  date: 2021-10-29_06-58-51
  done: false
  episode_len_mean: 295.29
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.529000000000146
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4454
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3785083655424238
          cur_lr: 5.000000000000001e-05
          entropy: 0.8868770917256673
          entropy_coeff: 0.009999999999999998
          kl: 0.023750572828268406
          policy_loss: -0.05099431375662486
          total_loss: 0.8742070840464697
          vf_explained_var: 0.3339618444442749
          vf_loss: 0.9250803901089563
    num_agent_steps_sampled: 1308000
    num_agent_steps_trained: 1308000
    num_steps_sampled: 1308000
    num_steps_trained: 1308000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1308,35542.3,1308000,-29.529,-22.4,-39.4,295.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1309000
  custom_metrics: {}
  date: 2021-10-29_06-59-17
  done: false
  episode_len_mean: 293.12
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.312000000000147
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 4
  episodes_total: 4458
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5677625483136356
          cur_lr: 5.000000000000001e-05
          entropy: 0.952374999390708
          entropy_coeff: 0.009999999999999998
          kl: 0.017196100570010406
          policy_loss: 0.07738577959438166
          total_loss: 0.8111134472820494
          vf_explained_var: 0.4349651336669922
          vf_loss: 0.7334881173239813
    num_agent_steps_sampled: 1309000
    num_agent_steps_trained: 1309000
    num_steps_sampled: 1309000
    num_steps_trained: 1309000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1309,35569.2,1309000,-29.312,-22.4,-39.4,293.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1310000
  custom_metrics: {}
  date: 2021-10-29_06-59-44
  done: false
  episode_len_mean: 291.82
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.182000000000144
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4461
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5677625483136356
          cur_lr: 5.000000000000001e-05
          entropy: 1.0456608321931626
          entropy_coeff: 0.009999999999999998
          kl: 0.007522535763927686
          policy_loss: -0.10734389871358871
          total_loss: 1.1904344426261053
          vf_explained_var: 0.18507541716098785
          vf_loss: 1.3039639287524754
    num_agent_steps_sampled: 1310000
    num_agent_steps_trained: 1310000
    num_steps_sampled: 1310000
    num_steps_trained: 1310000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1310,35595.6,1310000,-29.182,-22.4,-39.4,291.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1311000
  custom_metrics: {}
  date: 2021-10-29_07-00-09
  done: false
  episode_len_mean: 293.01
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.301000000000148
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 4
  episodes_total: 4465
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5677625483136356
          cur_lr: 5.000000000000001e-05
          entropy: 1.0465422067377301
          entropy_coeff: 0.009999999999999998
          kl: 0.013235273654614549
          policy_loss: 0.04773873061769539
          total_loss: 0.8870228436258104
          vf_explained_var: 0.4973343014717102
          vf_loss: 0.8422350505987803
    num_agent_steps_sampled: 1311000
    num_agent_steps_trained: 1311000
    num_steps_sampled: 1311000
    num_steps_trained: 1311000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1311,35621,1311000,-29.301,-22.4,-39.4,293.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1312000
  custom_metrics: {}
  date: 2021-10-29_07-00-35
  done: false
  episode_len_mean: 292.26
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -29.22600000000015
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4468
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5677625483136356
          cur_lr: 5.000000000000001e-05
          entropy: 1.0041979988416037
          entropy_coeff: 0.009999999999999998
          kl: 0.021968033517607132
          policy_loss: 0.025609593838453293
          total_loss: 0.5365869022077985
          vf_explained_var: 0.6625929474830627
          vf_loss: 0.5085466665733192
    num_agent_steps_sampled: 1312000
    num_agent_steps_trained: 1312000
    num_steps_sampled: 1312000
    num_steps_trained: 1312000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1312,35646.8,1312000,-29.226,-22.4,-39.4,292.26




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1313000
  custom_metrics: {}
  date: 2021-10-29_07-01-17
  done: false
  episode_len_mean: 291.73
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -29.173000000000147
  episode_reward_min: -39.40000000000029
  episodes_this_iter: 3
  episodes_total: 4471
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8516438224704534
          cur_lr: 5.000000000000001e-05
          entropy: 0.9489291389783223
          entropy_coeff: 0.009999999999999998
          kl: 0.007908666158888147
          policy_loss: -0.03562985575861401
          total_loss: 0.875902901093165
          vf_explained_var: 0.5172697305679321
          vf_loss: 0.9142866757180955
    num_agent_steps_sampled: 1313000
    num_agent_steps_trained: 1313000
    num_steps_sampled: 1313000
    num_steps_trained: 1313000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1313,35688.8,1313000,-29.173,-22.3,-39.4,291.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1314000
  custom_metrics: {}
  date: 2021-10-29_07-01-43
  done: false
  episode_len_mean: 294.8
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -29.48000000000015
  episode_reward_min: -48.00000000000041
  episodes_this_iter: 4
  episodes_total: 4475
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8516438224704534
          cur_lr: 5.000000000000001e-05
          entropy: 1.0485197914971245
          entropy_coeff: 0.009999999999999998
          kl: 0.007274493733941906
          policy_loss: -0.03596811832653152
          total_loss: 0.9472547385427686
          vf_explained_var: 0.17726419866085052
          vf_loss: 0.9875127772490183
    num_agent_steps_sampled: 1314000
    num_agent_steps_trained: 1314000
    num_steps_sampled: 1314000
    num_steps_trained: 1314000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1314,35714.5,1314000,-29.48,-22.3,-48,294.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1315000
  custom_metrics: {}
  date: 2021-10-29_07-02-05
  done: false
  episode_len_mean: 296.07
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -29.607000000000156
  episode_reward_min: -48.00000000000041
  episodes_this_iter: 2
  episodes_total: 4477
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8516438224704534
          cur_lr: 5.000000000000001e-05
          entropy: 1.144974766837226
          entropy_coeff: 0.009999999999999998
          kl: 0.004086769352841676
          policy_loss: -0.11157862941424052
          total_loss: 0.8824367493391037
          vf_explained_var: 0.034080423414707184
          vf_loss: 1.001984648903211
    num_agent_steps_sampled: 1315000
    num_agent_steps_trained: 1315000
    num_steps_sampled: 1315000
    num_steps_trained: 1315000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1315,35736.8,1315000,-29.607,-22.3,-48,296.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1316000
  custom_metrics: {}
  date: 2021-10-29_07-02-29
  done: false
  episode_len_mean: 297.79
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -29.779000000000156
  episode_reward_min: -48.00000000000041
  episodes_this_iter: 3
  episodes_total: 4480
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4258219112352267
          cur_lr: 5.000000000000001e-05
          entropy: 1.0965189152293735
          entropy_coeff: 0.009999999999999998
          kl: 0.029164496403395686
          policy_loss: -0.06784310369855828
          total_loss: 1.0556128614478641
          vf_explained_var: 0.337705135345459
          vf_loss: 1.1220022645261554
    num_agent_steps_sampled: 1316000
    num_agent_steps_trained: 1316000
    num_steps_sampled: 1316000
    num_steps_trained: 1316000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1316,35760.6,1316000,-29.779,-22.3,-48,297.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1317000
  custom_metrics: {}
  date: 2021-10-29_07-02-56
  done: false
  episode_len_mean: 296.52
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -29.652000000000154
  episode_reward_min: -48.00000000000041
  episodes_this_iter: 4
  episodes_total: 4484
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6387328668528401
          cur_lr: 5.000000000000001e-05
          entropy: 1.0524159140057034
          entropy_coeff: 0.009999999999999998
          kl: 0.009579069647170889
          policy_loss: 0.04684477191832331
          total_loss: 0.7215276082356771
          vf_explained_var: 0.7057174444198608
          vf_loss: 0.6790885296132829
    num_agent_steps_sampled: 1317000
    num_agent_steps_trained: 1317000
    num_steps_sampled: 1317000
    num_steps_trained: 1317000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1317,35787.9,1317000,-29.652,-22.3,-48,296.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1318000
  custom_metrics: {}
  date: 2021-10-29_07-03-15
  done: false
  episode_len_mean: 300.63
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -30.063000000000155
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 2
  episodes_total: 4486
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6387328668528401
          cur_lr: 5.000000000000001e-05
          entropy: 1.38706263701121
          entropy_coeff: 0.009999999999999998
          kl: 0.01034903404461684
          policy_loss: 0.12106154792838626
          total_loss: 0.7019409080346425
          vf_explained_var: -0.13361085951328278
          vf_loss: 0.5881397099130683
    num_agent_steps_sampled: 1318000
    num_agent_steps_trained: 1318000
    num_steps_sampled: 1318000
    num_steps_trained: 1318000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1318,35806.9,1318000,-30.063,-22.3,-52.1,300.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1319000
  custom_metrics: {}
  date: 2021-10-29_07-03-39
  done: false
  episode_len_mean: 300.4
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -30.040000000000155
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4489
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6387328668528401
          cur_lr: 5.000000000000001e-05
          entropy: 1.0926766177018483
          entropy_coeff: 0.009999999999999998
          kl: 0.008149623187982909
          policy_loss: 0.0535323065188196
          total_loss: 0.8254588408602609
          vf_explained_var: 0.46486344933509827
          vf_loss: 0.777647856498758
    num_agent_steps_sampled: 1319000
    num_agent_steps_trained: 1319000
    num_steps_sampled: 1319000
    num_steps_trained: 1319000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1319,35830.5,1319000,-30.04,-22.3,-52.1,300.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1320000
  custom_metrics: {}
  date: 2021-10-29_07-04-02
  done: false
  episode_len_mean: 302.14
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -30.21400000000016
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4492
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6387328668528401
          cur_lr: 5.000000000000001e-05
          entropy: 1.1187303013271757
          entropy_coeff: 0.009999999999999998
          kl: 0.015111124705777948
          policy_loss: 0.08370726332068443
          total_loss: 0.7921686427460777
          vf_explained_var: -0.0957515612244606
          vf_loss: 0.7099967100554042
    num_agent_steps_sampled: 1320000
    num_agent_steps_trained: 1320000
    num_steps_sampled: 1320000
    num_steps_trained: 1320000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1320,35853.1,1320000,-30.214,-22.3,-52.1,302.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1321000
  custom_metrics: {}
  date: 2021-10-29_07-04-25
  done: false
  episode_len_mean: 302.6
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -30.260000000000158
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 2
  episodes_total: 4494
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6387328668528401
          cur_lr: 5.000000000000001e-05
          entropy: 1.1483228060934279
          entropy_coeff: 0.009999999999999998
          kl: 0.011769992759898349
          policy_loss: -0.13613848057058123
          total_loss: 0.6880272487799327
          vf_explained_var: 0.38360628485679626
          vf_loss: 0.828131085054742
    num_agent_steps_sampled: 1321000
    num_agent_steps_trained: 1321000
    num_steps_sampled: 1321000
    num_steps_trained: 1321000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1321,35876.1,1321000,-30.26,-22.3,-52.1,302.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1322000
  custom_metrics: {}
  date: 2021-10-29_07-04-45
  done: false
  episode_len_mean: 306.96
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -30.69600000000016
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4497
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6387328668528401
          cur_lr: 5.000000000000001e-05
          entropy: 1.3803704513443842
          entropy_coeff: 0.009999999999999998
          kl: 0.011388304389098951
          policy_loss: 0.08418264620833926
          total_loss: 0.9486060947179794
          vf_explained_var: 0.17365816235542297
          vf_loss: 0.8709530759602785
    num_agent_steps_sampled: 1322000
    num_agent_steps_trained: 1322000
    num_steps_sampled: 1322000
    num_steps_trained: 1322000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1322,35896.6,1322000,-30.696,-22.3,-52.1,306.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1323000
  custom_metrics: {}
  date: 2021-10-29_07-05-05
  done: false
  episode_len_mean: 308.89
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -30.88900000000017
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 2
  episodes_total: 4499
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6387328668528401
          cur_lr: 5.000000000000001e-05
          entropy: 1.4214389204978943
          entropy_coeff: 0.009999999999999998
          kl: 0.010265005434023837
          policy_loss: -0.012904479768541125
          total_loss: 0.9636419375737508
          vf_explained_var: 0.27623122930526733
          vf_loss: 0.9842042048772176
    num_agent_steps_sampled: 1323000
    num_agent_steps_trained: 1323000
    num_steps_sampled: 1323000
    num_steps_trained: 1323000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1323,35916.3,1323000,-30.889,-22.3,-52.1,308.89




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1324000
  custom_metrics: {}
  date: 2021-10-29_07-05-48
  done: false
  episode_len_mean: 309.62
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -30.962000000000174
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 4
  episodes_total: 4503
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6387328668528401
          cur_lr: 5.000000000000001e-05
          entropy: 1.1598814434475369
          entropy_coeff: 0.009999999999999998
          kl: 0.013720147915027308
          policy_loss: -0.002790735827551948
          total_loss: 0.9898915487858985
          vf_explained_var: 0.4792960584163666
          vf_loss: 0.9955175889862908
    num_agent_steps_sampled: 1324000
    num_agent_steps_trained: 1324000
    num_steps_sampled: 1324000
    num_steps_trained: 1324000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1324,35959.5,1324000,-30.962,-22.3,-52.1,309.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1325000
  custom_metrics: {}
  date: 2021-10-29_07-06-12
  done: false
  episode_len_mean: 311.26
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -31.126000000000175
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4506
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6387328668528401
          cur_lr: 5.000000000000001e-05
          entropy: 1.4168699635399713
          entropy_coeff: 0.009999999999999998
          kl: 0.039047228243168614
          policy_loss: 0.11021683911482492
          total_loss: 1.0693621569209628
          vf_explained_var: 0.4353422522544861
          vf_loss: 0.94837327218718
    num_agent_steps_sampled: 1325000
    num_agent_steps_trained: 1325000
    num_steps_sampled: 1325000
    num_steps_trained: 1325000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1325,35983.4,1325000,-31.126,-22.3,-52.1,311.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1326000
  custom_metrics: {}
  date: 2021-10-29_07-06-36
  done: false
  episode_len_mean: 312.14
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -31.21400000000018
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 2
  episodes_total: 4508
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.3285819172859192
          entropy_coeff: 0.009999999999999998
          kl: 0.014895058403506904
          policy_loss: -0.06223642097579108
          total_loss: 0.7539342358708382
          vf_explained_var: 0.48468080163002014
          vf_loss: 0.8151855359474818
    num_agent_steps_sampled: 1326000
    num_agent_steps_trained: 1326000
    num_steps_sampled: 1326000
    num_steps_trained: 1326000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1326,36007.1,1326000,-31.214,-22.3,-52.1,312.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1327000
  custom_metrics: {}
  date: 2021-10-29_07-06-59
  done: false
  episode_len_mean: 313.96
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -31.396000000000182
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4511
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.372455616792043
          entropy_coeff: 0.009999999999999998
          kl: 0.018533887605609402
          policy_loss: -0.11081295410792033
          total_loss: 1.3293014367421467
          vf_explained_var: 0.11509312689304352
          vf_loss: 1.436081635951996
    num_agent_steps_sampled: 1327000
    num_agent_steps_trained: 1327000
    num_steps_sampled: 1327000
    num_steps_trained: 1327000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1327,36030,1327000,-31.396,-22.3,-52.1,313.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1328000
  custom_metrics: {}
  date: 2021-10-29_07-07-22
  done: false
  episode_len_mean: 315.66
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -31.566000000000177
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4514
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.4445106943448385
          entropy_coeff: 0.009999999999999998
          kl: 0.008589538201553873
          policy_loss: -0.042078807950019836
          total_loss: 0.8789964043431812
          vf_explained_var: 0.19329307973384857
          vf_loss: 0.9272906893450353
    num_agent_steps_sampled: 1328000
    num_agent_steps_trained: 1328000
    num_steps_sampled: 1328000
    num_steps_trained: 1328000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1328,36053.5,1328000,-31.566,-22.3,-52.1,315.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1329000
  custom_metrics: {}
  date: 2021-10-29_07-07-46
  done: false
  episode_len_mean: 315.98
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -31.598000000000187
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4517
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.4524966345893011
          entropy_coeff: 0.009999999999999998
          kl: 0.009805953380649888
          policy_loss: -0.027357227272457547
          total_loss: 0.8669377273983425
          vf_explained_var: 0.40722528100013733
          vf_loss: 0.8994248376952277
    num_agent_steps_sampled: 1329000
    num_agent_steps_trained: 1329000
    num_steps_sampled: 1329000
    num_steps_trained: 1329000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1329,36077.6,1329000,-31.598,-22.3,-52.1,315.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1330000
  custom_metrics: {}
  date: 2021-10-29_07-08-11
  done: false
  episode_len_mean: 317.2
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -31.720000000000187
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4520
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.447242169910007
          entropy_coeff: 0.009999999999999998
          kl: 0.00809582726874554
          policy_loss: -0.058459359572993384
          total_loss: 0.9807071185774273
          vf_explained_var: 0.23767831921577454
          vf_loss: 1.0458822972244686
    num_agent_steps_sampled: 1330000
    num_agent_steps_trained: 1330000
    num_steps_sampled: 1330000
    num_steps_trained: 1330000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1330,36102.2,1330000,-31.72,-22.3,-52.1,317.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1331000
  custom_metrics: {}
  date: 2021-10-29_07-08-31
  done: false
  episode_len_mean: 320.79
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -32.079000000000185
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4523
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.6217775993876986
          entropy_coeff: 0.009999999999999998
          kl: 0.007439912822375542
          policy_loss: 0.05669411437378989
          total_loss: 0.6985757990015877
          vf_explained_var: -0.04231512174010277
          vf_loss: 0.6509712858332528
    num_agent_steps_sampled: 1331000
    num_agent_steps_trained: 1331000
    num_steps_sampled: 1331000
    num_steps_trained: 1331000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1331,36122.6,1331000,-32.079,-22.3,-52.1,320.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1332000
  custom_metrics: {}
  date: 2021-10-29_07-08-56
  done: false
  episode_len_mean: 321.86
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -32.18600000000019
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4526
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.6005011823442248
          entropy_coeff: 0.009999999999999998
          kl: 0.01151036437975803
          policy_loss: 0.07839301841126547
          total_loss: 1.0387633545531167
          vf_explained_var: 0.3165750801563263
          vf_loss: 0.9653472820917766
    num_agent_steps_sampled: 1332000
    num_agent_steps_trained: 1332000
    num_steps_sampled: 1332000
    num_steps_trained: 1332000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1332,36147.3,1332000,-32.186,-22.3,-52.1,321.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1333000
  custom_metrics: {}
  date: 2021-10-29_07-09-17
  done: false
  episode_len_mean: 324.18
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -32.41800000000019
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 2
  episodes_total: 4528
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.6424059391021728
          entropy_coeff: 0.009999999999999998
          kl: 0.016019161250585827
          policy_loss: -0.10469717317157322
          total_loss: 0.9946694493293762
          vf_explained_var: -0.13621847331523895
          vf_loss: 1.10044274561935
    num_agent_steps_sampled: 1333000
    num_agent_steps_trained: 1333000
    num_steps_sampled: 1333000
    num_steps_trained: 1333000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1333,36167.8,1333000,-32.418,-22.3,-52.1,324.18




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1334000
  custom_metrics: {}
  date: 2021-10-29_07-09-57
  done: false
  episode_len_mean: 326.98
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -32.69800000000019
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4531
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.1620220965809291
          entropy_coeff: 0.009999999999999998
          kl: 0.018477294086976355
          policy_loss: -0.04063568347030216
          total_loss: 0.7305145780245463
          vf_explained_var: 0.036230966448783875
          vf_loss: 0.7650673997071055
    num_agent_steps_sampled: 1334000
    num_agent_steps_trained: 1334000
    num_steps_sampled: 1334000
    num_steps_trained: 1334000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1334,36208,1334000,-32.698,-22.3,-52.1,326.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1335000
  custom_metrics: {}
  date: 2021-10-29_07-10-19
  done: false
  episode_len_mean: 328.37
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -32.837000000000195
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4534
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.95809930027926
          cur_lr: 5.000000000000001e-05
          entropy: 1.522200608253479
          entropy_coeff: 0.009999999999999998
          kl: 0.028555595650127094
          policy_loss: -0.01293677505519655
          total_loss: 0.7169927800695102
          vf_explained_var: 0.5855039954185486
          vf_loss: 0.7177924621436331
    num_agent_steps_sampled: 1335000
    num_agent_steps_trained: 1335000
    num_steps_sampled: 1335000
    num_steps_trained: 1335000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1335,36229.8,1335000,-32.837,-22.3,-52.1,328.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1336000
  custom_metrics: {}
  date: 2021-10-29_07-10-44
  done: false
  episode_len_mean: 328.35
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -32.83500000000019
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4537
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.399117840660943
          entropy_coeff: 0.009999999999999998
          kl: 0.01500963674240538
          policy_loss: -0.15913221836090088
          total_loss: 1.1031180640061697
          vf_explained_var: 0.4351460337638855
          vf_loss: 1.2546703696250916
    num_agent_steps_sampled: 1336000
    num_agent_steps_trained: 1336000
    num_steps_sampled: 1336000
    num_steps_trained: 1336000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1336,36255.5,1336000,-32.835,-22.3,-52.1,328.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1337000
  custom_metrics: {}
  date: 2021-10-29_07-11-10
  done: false
  episode_len_mean: 329.09
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -32.9090000000002
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 4
  episodes_total: 4541
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.2383082032203674
          entropy_coeff: 0.009999999999999998
          kl: 0.009053976567745063
          policy_loss: 0.033242068770858973
          total_loss: 0.6792818155553606
          vf_explained_var: 0.6953021287918091
          vf_loss: 0.6454109172026317
    num_agent_steps_sampled: 1337000
    num_agent_steps_trained: 1337000
    num_steps_sampled: 1337000
    num_steps_trained: 1337000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1337,36281.5,1337000,-32.909,-22.3,-52.1,329.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1338000
  custom_metrics: {}
  date: 2021-10-29_07-11-32
  done: false
  episode_len_mean: 330.76
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.07600000000021
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 2
  episodes_total: 4543
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.5496499472194247
          entropy_coeff: 0.009999999999999998
          kl: 0.010021291522572223
          policy_loss: -0.11802901244825786
          total_loss: 0.8363063133425183
          vf_explained_var: 0.5258846879005432
          vf_loss: 0.9554297380977207
    num_agent_steps_sampled: 1338000
    num_agent_steps_trained: 1338000
    num_steps_sampled: 1338000
    num_steps_trained: 1338000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1338,36302.6,1338000,-33.076,-22.3,-52.1,330.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1339000
  custom_metrics: {}
  date: 2021-10-29_07-11-55
  done: false
  episode_len_mean: 333.03
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.3030000000002
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4546
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.6521033975813124
          entropy_coeff: 0.009999999999999998
          kl: 0.007023559026110683
          policy_loss: 0.01217615587843789
          total_loss: 1.0420513699452083
          vf_explained_var: 0.4770221710205078
          vf_loss: 1.0363023553457524
    num_agent_steps_sampled: 1339000
    num_agent_steps_trained: 1339000
    num_steps_sampled: 1339000
    num_steps_trained: 1339000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1339,36326,1339000,-33.303,-22.3,-52.1,333.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1340000
  custom_metrics: {}
  date: 2021-10-29_07-12-17
  done: false
  episode_len_mean: 335.28
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.52800000000021
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4549
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.6489573571417067
          entropy_coeff: 0.009999999999999998
          kl: 0.00967585193495187
          policy_loss: 0.004473085535897149
          total_loss: 0.8873983469274309
          vf_explained_var: 0.5418945550918579
          vf_loss: 0.8855092024223672
    num_agent_steps_sampled: 1340000
    num_agent_steps_trained: 1340000
    num_steps_sampled: 1340000
    num_steps_trained: 1340000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1340,36348,1340000,-33.528,-22.3,-52.1,335.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1341000
  custom_metrics: {}
  date: 2021-10-29_07-12-44
  done: false
  episode_len_mean: 335.27
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.527000000000214
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4552
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.1753784656524657
          entropy_coeff: 0.009999999999999998
          kl: 0.01241252990717313
          policy_loss: -0.08127558396922217
          total_loss: 0.7738762868775262
          vf_explained_var: 0.6089831590652466
          vf_loss: 0.8490670051839616
    num_agent_steps_sampled: 1341000
    num_agent_steps_trained: 1341000
    num_steps_sampled: 1341000
    num_steps_trained: 1341000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1341,36375.2,1341000,-33.527,-22.3,-52.1,335.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1342000
  custom_metrics: {}
  date: 2021-10-29_07-13-11
  done: false
  episode_len_mean: 336.71
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.67100000000021
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 4
  episodes_total: 4556
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.4654051701227824
          entropy_coeff: 0.009999999999999998
          kl: 0.008185587685461668
          policy_loss: -0.003508734123574363
          total_loss: 1.359376605351766
          vf_explained_var: 0.2704053819179535
          vf_loss: 1.3657754871580337
    num_agent_steps_sampled: 1342000
    num_agent_steps_trained: 1342000
    num_steps_sampled: 1342000
    num_steps_trained: 1342000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1342,36401.7,1342000,-33.671,-22.3,-52.1,336.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1343000
  custom_metrics: {}
  date: 2021-10-29_07-13-40
  done: false
  episode_len_mean: 336.07
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.60700000000021
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4559
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.1522734098964267
          entropy_coeff: 0.009999999999999998
          kl: 0.012026100348786647
          policy_loss: -0.016752975641025437
          total_loss: 0.6161793493562274
          vf_explained_var: 0.6473245620727539
          vf_loss: 0.6271717614597745
    num_agent_steps_sampled: 1343000
    num_agent_steps_trained: 1343000
    num_steps_sampled: 1343000
    num_steps_trained: 1343000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1343,36431.1,1343000,-33.607,-22.3,-52.1,336.07




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1344000
  custom_metrics: {}
  date: 2021-10-29_07-14-22
  done: false
  episode_len_mean: 337.5
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.75000000000021
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4562
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.6234647631645203
          entropy_coeff: 0.009999999999999998
          kl: 0.0146014149532391
          policy_loss: -0.0090933115945922
          total_loss: 0.8699395683076646
          vf_explained_var: 0.2784399390220642
          vf_loss: 0.874283122850789
    num_agent_steps_sampled: 1344000
    num_agent_steps_trained: 1344000
    num_steps_sampled: 1344000
    num_steps_trained: 1344000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1344,36473.3,1344000,-33.75,-22.3,-52.1,337.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1345000
  custom_metrics: {}
  date: 2021-10-29_07-14-48
  done: false
  episode_len_mean: 337.42
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.7420000000002
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 4
  episodes_total: 4566
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.0755374477969275
          entropy_coeff: 0.009999999999999998
          kl: 0.006114488807073413
          policy_loss: 0.02703611726562182
          total_loss: 0.9487802909480201
          vf_explained_var: 0.5978913307189941
          vf_loss: 0.9237120976050694
    num_agent_steps_sampled: 1345000
    num_agent_steps_trained: 1345000
    num_steps_sampled: 1345000
    num_steps_trained: 1345000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1345,36499.4,1345000,-33.742,-22.3,-52.1,337.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1346000
  custom_metrics: {}
  date: 2021-10-29_07-15-11
  done: false
  episode_len_mean: 338.13
  episode_media: {}
  episode_reward_max: -22.300000000000047
  episode_reward_mean: -33.81300000000021
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4569
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.4647273964352079
          entropy_coeff: 0.009999999999999998
          kl: 0.00893897049124735
          policy_loss: 0.06178681179881096
          total_loss: 0.9320028341478772
          vf_explained_var: -0.05607394501566887
          vf_loss: 0.8720166626903746
    num_agent_steps_sampled: 1346000
    num_agent_steps_trained: 1346000
    num_steps_sampled: 1346000
    num_steps_trained: 1346000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1346,36522.3,1346000,-33.813,-22.3,-52.1,338.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1347000
  custom_metrics: {}
  date: 2021-10-29_07-15-41
  done: false
  episode_len_mean: 336.39
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -33.63900000000021
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4572
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.021969015730752
          entropy_coeff: 0.009999999999999998
          kl: 0.014674261593586892
          policy_loss: -0.06350849287377464
          total_loss: 0.7492093129290475
          vf_explained_var: 0.6794400811195374
          vf_loss: 0.801848390367296
    num_agent_steps_sampled: 1347000
    num_agent_steps_trained: 1347000
    num_steps_sampled: 1347000
    num_steps_trained: 1347000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1347,36551.9,1347000,-33.639,-24.7,-52.1,336.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1348000
  custom_metrics: {}
  date: 2021-10-29_07-16-12
  done: false
  episode_len_mean: 334.67
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -33.467000000000205
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 4
  episodes_total: 4576
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4371489504188903
          cur_lr: 5.000000000000001e-05
          entropy: 1.1158359885215758
          entropy_coeff: 0.009999999999999998
          kl: 0.004530598050027695
          policy_loss: -0.03494661135805978
          total_loss: 0.6691602637370427
          vf_explained_var: 0.5563980340957642
          vf_loss: 0.7087540921237734
    num_agent_steps_sampled: 1348000
    num_agent_steps_trained: 1348000
    num_steps_sampled: 1348000
    num_steps_trained: 1348000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1348,36582.4,1348000,-33.467,-24.7,-52.1,334.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1349000
  custom_metrics: {}
  date: 2021-10-29_07-16-39
  done: false
  episode_len_mean: 332.06
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -33.20600000000021
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 4
  episodes_total: 4580
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7185744752094452
          cur_lr: 5.000000000000001e-05
          entropy: 1.0807117177380456
          entropy_coeff: 0.009999999999999998
          kl: 0.01203664786033032
          policy_loss: 0.06948866049448649
          total_loss: 0.6297324902481503
          vf_explained_var: 0.7403013706207275
          vf_loss: 0.5624017190602091
    num_agent_steps_sampled: 1349000
    num_agent_steps_trained: 1349000
    num_steps_sampled: 1349000
    num_steps_trained: 1349000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1349,36610.3,1349000,-33.206,-24.7,-52.1,332.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1350000
  custom_metrics: {}
  date: 2021-10-29_07-17-08
  done: false
  episode_len_mean: 332.02
  episode_media: {}
  episode_reward_max: -24.70000000000008
  episode_reward_mean: -33.202000000000204
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4583
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7185744752094452
          cur_lr: 5.000000000000001e-05
          entropy: 1.2910613112979465
          entropy_coeff: 0.009999999999999998
          kl: 0.009380143377333165
          policy_loss: 0.004296799148950312
          total_loss: 0.8912629200352563
          vf_explained_var: -0.12195413559675217
          vf_loss: 0.8931364052825503
    num_agent_steps_sampled: 1350000
    num_agent_steps_trained: 1350000
    num_steps_sampled: 1350000
    num_steps_trained: 1350000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1350,36639.2,1350000,-33.202,-24.7,-52.1,332.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1351000
  custom_metrics: {}
  date: 2021-10-29_07-17-34
  done: false
  episode_len_mean: 329.74
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -32.9740000000002
  episode_reward_min: -48.50000000000042
  episodes_this_iter: 3
  episodes_total: 4586
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7185744752094452
          cur_lr: 5.000000000000001e-05
          entropy: 1.2286013397905562
          entropy_coeff: 0.009999999999999998
          kl: 0.010835414852912903
          policy_loss: 0.06317672265900506
          total_loss: 0.8847570293479495
          vf_explained_var: 0.16214270889759064
          vf_loss: 0.8260802738342641
    num_agent_steps_sampled: 1351000
    num_agent_steps_trained: 1351000
    num_steps_sampled: 1351000
    num_steps_trained: 1351000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1351,36664.5,1351000,-32.974,-24.3,-48.5,329.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1352000
  custom_metrics: {}
  date: 2021-10-29_07-18-00
  done: false
  episode_len_mean: 329.3
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -32.9300000000002
  episode_reward_min: -48.50000000000042
  episodes_this_iter: 3
  episodes_total: 4589
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7185744752094452
          cur_lr: 5.000000000000001e-05
          entropy: 1.2334833310710058
          entropy_coeff: 0.009999999999999998
          kl: 0.007231301154140645
          policy_loss: -0.03586188811394903
          total_loss: 0.8371624138620165
          vf_explained_var: 0.24525070190429688
          vf_loss: 0.8801628983683056
    num_agent_steps_sampled: 1352000
    num_agent_steps_trained: 1352000
    num_steps_sampled: 1352000
    num_steps_trained: 1352000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1352,36691.1,1352000,-32.93,-24.3,-48.5,329.3




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1353000
  custom_metrics: {}
  date: 2021-10-29_07-18-46
  done: false
  episode_len_mean: 326.46
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -32.64600000000019
  episode_reward_min: -48.00000000000041
  episodes_this_iter: 4
  episodes_total: 4593
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7185744752094452
          cur_lr: 5.000000000000001e-05
          entropy: 1.0484309090508355
          entropy_coeff: 0.009999999999999998
          kl: 0.01146640737113829
          policy_loss: 0.018420456846555074
          total_loss: 0.7280600062674947
          vf_explained_var: 0.7295798659324646
          vf_loss: 0.7118843843539556
    num_agent_steps_sampled: 1353000
    num_agent_steps_trained: 1353000
    num_steps_sampled: 1353000
    num_steps_trained: 1353000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1353,36737.1,1353000,-32.646,-24.3,-48,326.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1354000
  custom_metrics: {}
  date: 2021-10-29_07-19-18
  done: false
  episode_len_mean: 320.11
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -32.01100000000019
  episode_reward_min: -48.00000000000041
  episodes_this_iter: 4
  episodes_total: 4597
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7185744752094452
          cur_lr: 5.000000000000001e-05
          entropy: 0.8658935248851776
          entropy_coeff: 0.009999999999999998
          kl: 0.005251253913524185
          policy_loss: 0.002711943040291468
          total_loss: 0.6346415026320351
          vf_explained_var: 0.7490693926811218
          vf_loss: 0.6368150770664215
    num_agent_steps_sampled: 1354000
    num_agent_steps_trained: 1354000
    num_steps_sampled: 1354000
    num_steps_trained: 1354000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1354,36768.7,1354000,-32.011,-24.3,-48,320.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1355000
  custom_metrics: {}
  date: 2021-10-29_07-19-44
  done: false
  episode_len_mean: 318.74
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -31.874000000000184
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4600
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7185744752094452
          cur_lr: 5.000000000000001e-05
          entropy: 1.2262033157878451
          entropy_coeff: 0.009999999999999998
          kl: 0.009630076593029141
          policy_loss: -0.06362638738420275
          total_loss: 0.7972823889719115
          vf_explained_var: -0.029738368466496468
          vf_loss: 0.8662508885065715
    num_agent_steps_sampled: 1355000
    num_agent_steps_trained: 1355000
    num_steps_sampled: 1355000
    num_steps_trained: 1355

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1355,36794.7,1355000,-31.874,-24.3,-52.4,318.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1356000
  custom_metrics: {}
  date: 2021-10-29_07-20-13
  done: false
  episode_len_mean: 318.28
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -31.82800000000018
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4603
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7185744752094452
          cur_lr: 5.000000000000001e-05
          entropy: 1.195376690891054
          entropy_coeff: 0.009999999999999998
          kl: 0.00480855329883904
          policy_loss: -0.021751889255311754
          total_loss: 0.9423401547802819
          vf_explained_var: 0.09683714807033539
          vf_loss: 0.9725905222197374
    num_agent_steps_sampled: 1356000
    num_agent_steps_trained: 1356000
    num_steps_sampled: 1356000
    num_steps_trained: 1356000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1356,36823.3,1356000,-31.828,-24.3,-52.4,318.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1357000
  custom_metrics: {}
  date: 2021-10-29_07-20-41
  done: false
  episode_len_mean: 317.07
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -31.70700000000017
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 4
  episodes_total: 4607
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3592872376047226
          cur_lr: 5.000000000000001e-05
          entropy: 1.2161082572407194
          entropy_coeff: 0.009999999999999998
          kl: 0.009255786653462413
          policy_loss: -0.06384416882776552
          total_loss: 0.8454798665311601
          vf_explained_var: 0.6441383957862854
          vf_loss: 0.9181596408287684
    num_agent_steps_sampled: 1357000
    num_agent_steps_trained: 1357000
    num_steps_sampled: 1357000
    num_steps_trained: 1357000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1357,36851.4,1357000,-31.707,-24.3,-52.4,317.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1358000
  custom_metrics: {}
  date: 2021-10-29_07-21-09
  done: false
  episode_len_mean: 315.02
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -31.50200000000018
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4610
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3592872376047226
          cur_lr: 5.000000000000001e-05
          entropy: 1.0435390326711866
          entropy_coeff: 0.009999999999999998
          kl: 0.005571011383668968
          policy_loss: 0.11038917162352138
          total_loss: 0.4943324794371923
          vf_explained_var: 0.8582792282104492
          vf_loss: 0.39237710990839536
    num_agent_steps_sampled: 1358000
    num_agent_steps_trained: 1358000
    num_steps_sampled: 1358000
    num_steps_trained: 1358000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1358,36879.9,1358000,-31.502,-24.3,-52.4,315.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1359000
  custom_metrics: {}
  date: 2021-10-29_07-21-37
  done: false
  episode_len_mean: 312.33
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -31.233000000000178
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4613
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3592872376047226
          cur_lr: 5.000000000000001e-05
          entropy: 1.069053171740638
          entropy_coeff: 0.009999999999999998
          kl: 0.0060669938157352655
          policy_loss: -0.05424457937479019
          total_loss: 0.6294305208656523
          vf_explained_var: 0.7858306765556335
          vf_loss: 0.6921858340501785
    num_agent_steps_sampled: 1359000
    num_agent_steps_trained: 1359000
    num_steps_sampled: 1359000
    num_steps_trained: 1359000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1359,36907.2,1359000,-31.233,-24.3,-52.4,312.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1360000
  custom_metrics: {}
  date: 2021-10-29_07-22-04
  done: false
  episode_len_mean: 312.56
  episode_media: {}
  episode_reward_max: -24.300000000000075
  episode_reward_mean: -31.25600000000017
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4616
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3592872376047226
          cur_lr: 5.000000000000001e-05
          entropy: 1.306516135401196
          entropy_coeff: 0.009999999999999998
          kl: 0.011515215557846483
          policy_loss: -0.07239007958107524
          total_loss: 1.142699948946635
          vf_explained_var: 0.2280224710702896
          vf_loss: 1.2240179123149977
    num_agent_steps_sampled: 1360000
    num_agent_steps_trained: 1360000
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1360,36934.3,1360000,-31.256,-24.3,-52.4,312.56




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1361000
  custom_metrics: {}
  date: 2021-10-29_07-22-47
  done: false
  episode_len_mean: 311.78
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -31.17800000000017
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 4
  episodes_total: 4620
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3592872376047226
          cur_lr: 5.000000000000001e-05
          entropy: 1.3088168329662746
          entropy_coeff: 0.009999999999999998
          kl: 0.016740084964039583
          policy_loss: -0.04549047864145703
          total_loss: 0.9594199142522282
          vf_explained_var: 0.5564711093902588
          vf_loss: 1.011984062525961
    num_agent_steps_sampled: 1361000
    num_agent_steps_trained: 1361000
    num_steps_sampled: 1361000
    num_steps_trained: 1361000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1361,36977.8,1361000,-31.178,-20.1,-52.4,311.78


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1362000
  custom_metrics: {}
  date: 2021-10-29_07-23-16
  done: false
  episode_len_mean: 308.31
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.83100000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4623
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3592872376047226
          cur_lr: 5.000000000000001e-05
          entropy: 1.1811972459157307
          entropy_coeff: 0.009999999999999998
          kl: 0.022836739090855453
          policy_loss: 0.0066049915221002365
          total_loss: 0.782341280579567
          vf_explained_var: 0.2014576643705368
          vf_loss: 0.7793433104952177
    num_agent_steps_sampled: 1362000
    num_agent_steps_trained: 1362000
    num_steps_sampled: 1362000
    num_steps_trained: 1362000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1362,37006,1362000,-30.831,-20.1,-52.4,308.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1363000
  custom_metrics: {}
  date: 2021-10-29_07-23-44
  done: false
  episode_len_mean: 306.26
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.62600000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 4
  episodes_total: 4627
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5389308564070837
          cur_lr: 5.000000000000001e-05
          entropy: 1.1493802626927694
          entropy_coeff: 0.009999999999999998
          kl: 0.008047012892789793
          policy_loss: -0.13999027725723054
          total_loss: 0.7021944065888722
          vf_explained_var: 0.4043181836605072
          vf_loss: 0.8493417080905702
    num_agent_steps_sampled: 1363000
    num_agent_steps_trained: 1363000
    num_steps_sampled: 1363000
    num_steps_trained: 1363000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1363,37034.3,1363000,-30.626,-20.1,-52.4,306.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1364000
  custom_metrics: {}
  date: 2021-10-29_07-24-14
  done: false
  episode_len_mean: 304.17
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.417000000000165
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4630
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5389308564070837
          cur_lr: 5.000000000000001e-05
          entropy: 1.1836585309770373
          entropy_coeff: 0.009999999999999998
          kl: 0.011827484631873927
          policy_loss: -0.05115960459742281
          total_loss: 0.4920705868138207
          vf_explained_var: 0.7188281416893005
          vf_loss: 0.5486925800641378
    num_agent_steps_sampled: 1364000
    num_agent_steps_trained: 1364000
    num_steps_sampled: 1364000
    num_steps_trained: 1364000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1364,37064.4,1364000,-30.417,-20.1,-52.4,304.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1365000
  custom_metrics: {}
  date: 2021-10-29_07-24-36
  done: false
  episode_len_mean: 303.8
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.38000000000017
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4633
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5389308564070837
          cur_lr: 5.000000000000001e-05
          entropy: 1.4623853113916185
          entropy_coeff: 0.009999999999999998
          kl: 0.02982261116129005
          policy_loss: -0.028506627016597324
          total_loss: 0.7982676052384906
          vf_explained_var: 0.6029015779495239
          vf_loss: 0.8253257698482938
    num_agent_steps_sampled: 1365000
    num_agent_steps_trained: 1365000
    num_steps_sampled: 1365000
    num_steps_trained: 1365000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1365,37086.4,1365000,-30.38,-20.1,-52.4,303.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1366000
  custom_metrics: {}
  date: 2021-10-29_07-25-00
  done: false
  episode_len_mean: 304.57
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.45700000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4636
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.3748857617378234
          entropy_coeff: 0.009999999999999998
          kl: 0.008549085149944504
          policy_loss: 0.02347566212217013
          total_loss: 1.25504230260849
          vf_explained_var: 0.290179044008255
          vf_loss: 1.2384044299523036
    num_agent_steps_sampled: 1366000
    num_agent_steps_trained: 1366000
    num_steps_sampled: 1366000
    num_steps_trained: 1366000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1366,37110.1,1366000,-30.457,-20.1,-52.4,304.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1367000
  custom_metrics: {}
  date: 2021-10-29_07-25-31
  done: false
  episode_len_mean: 303.16
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.31600000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 4
  episodes_total: 4640
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.226103346877628
          entropy_coeff: 0.009999999999999998
          kl: 0.010778728304553335
          policy_loss: -0.013906005894144376
          total_loss: 1.1625836273034413
          vf_explained_var: 0.25855323672294617
          vf_loss: 1.1800371713108486
    num_agent_steps_sampled: 1367000
    num_agent_steps_trained: 1367000
    num_steps_sampled: 1367000
    num_steps_trained: 1367000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1367,37141,1367000,-30.316,-20.1,-52.4,303.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1368000
  custom_metrics: {}
  date: 2021-10-29_07-25-57
  done: false
  episode_len_mean: 302.17
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.21700000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4643
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.3056822233729892
          entropy_coeff: 0.009999999999999998
          kl: 0.008927479904443203
          policy_loss: 0.04589503051506148
          total_loss: 0.7063922532730632
          vf_explained_var: 0.5872704386711121
          vf_loss: 0.6663371038105753
    num_agent_steps_sampled: 1368000
    num_agent_steps_trained: 1368000
    num_steps_sampled: 1368000
    num_steps_trained: 1368000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1368,37167.4,1368000,-30.217,-20.1,-52.4,302.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1369000
  custom_metrics: {}
  date: 2021-10-29_07-26-22
  done: false
  episode_len_mean: 300.9
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.09000000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4646
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.4633290939860875
          entropy_coeff: 0.009999999999999998
          kl: 0.010758535328639043
          policy_loss: 0.08493564791149563
          total_loss: 0.8904884278774261
          vf_explained_var: 0.5209500789642334
          vf_loss: 0.811488903231091
    num_agent_steps_sampled: 1369000
    num_agent_steps_trained: 1369000
    num_steps_sampled: 1369000
    num_steps_trained: 1369000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1369,37192.8,1369000,-30.09,-20.1,-52.4,300.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1370000
  custom_metrics: {}
  date: 2021-10-29_07-26-52
  done: false
  episode_len_mean: 299.39
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.939000000000156
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4649
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.3565671523412068
          entropy_coeff: 0.009999999999999998
          kl: 0.007768011021129142
          policy_loss: -0.03830568492412567
          total_loss: 0.992319909731547
          vf_explained_var: 0.37346068024635315
          vf_loss: 1.0379116425911585
    num_agent_steps_sampled: 1370000
    num_agent_steps_trained: 1370000
    num_steps_sampled: 1370000
    num_steps_trained: 1370000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1370,37222.6,1370000,-29.939,-20.1,-52.4,299.39




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1371000
  custom_metrics: {}
  date: 2021-10-29_07-27-41
  done: false
  episode_len_mean: 297.54
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.75400000000015
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 4
  episodes_total: 4653
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.1594386948479547
          entropy_coeff: 0.009999999999999998
          kl: 0.00826160196490575
          policy_loss: -0.02131624644001325
          total_loss: 1.382975697517395
          vf_explained_var: 0.17878879606723785
          vf_loss: 1.4092076818148296
    num_agent_steps_sampled: 1371000
    num_agent_steps_trained: 1371000
    num_steps_sampled: 1371000
    num_steps_trained: 1371000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1371,37271.3,1371000,-29.754,-20.1,-52.4,297.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1372000
  custom_metrics: {}
  date: 2021-10-29_07-28-05
  done: false
  episode_len_mean: 297.93
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.793000000000152
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4656
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.391440388891432
          entropy_coeff: 0.009999999999999998
          kl: 0.014462319962327799
          policy_loss: -0.13523925902942815
          total_loss: 0.9261784927712546
          vf_explained_var: 0.46106016635894775
          vf_loss: 1.0636408746242523
    num_agent_steps_sampled: 1372000
    num_agent_steps_trained: 1372000
    num_steps_sampled: 1372000
    num_steps_trained: 1372000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1372,37294.8,1372000,-29.793,-20.1,-52.4,297.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1373000
  custom_metrics: {}
  date: 2021-10-29_07-28-29
  done: false
  episode_len_mean: 300.5
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.05000000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4659
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.4496361136436462
          entropy_coeff: 0.009999999999999998
          kl: 0.007829248966455853
          policy_loss: 0.03600667847527398
          total_loss: 1.242098789744907
          vf_explained_var: 0.4797120690345764
          vf_loss: 1.2142593410280016
    num_agent_steps_sampled: 1373000
    num_agent_steps_trained: 1373000
    num_steps_sampled: 1373000
    num_steps_trained: 1373000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1373,37319.1,1373000,-30.05,-20.1,-52.4,300.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1374000
  custom_metrics: {}
  date: 2021-10-29_07-28-55
  done: false
  episode_len_mean: 299.83
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.98300000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4662
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.5650394082069397
          entropy_coeff: 0.009999999999999998
          kl: 0.008760834753692295
          policy_loss: -0.10119769606325361
          total_loss: 1.2821017305056255
          vf_explained_var: 0.24147631227970123
          vf_loss: 1.3918676018714904
    num_agent_steps_sampled: 1374000
    num_agent_steps_trained: 1374000
    num_steps_sampled: 1374000
    num_steps_trained: 1374000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1374,37345.6,1374000,-29.983,-20.1,-52.4,299.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1375000
  custom_metrics: {}
  date: 2021-10-29_07-29-21
  done: false
  episode_len_mean: 301.51
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.15100000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4665
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.3063200612862904
          entropy_coeff: 0.009999999999999998
          kl: 0.01554981664146374
          policy_loss: -0.10401429997550117
          total_loss: 0.7734138823217815
          vf_explained_var: 0.5366863012313843
          vf_loss: 0.8779209620422788
    num_agent_steps_sampled: 1375000
    num_agent_steps_trained: 1375000
    num_steps_sampled: 1375000
    num_steps_trained: 1375000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1375,37371.5,1375000,-30.151,-20.1,-52.4,301.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1376000
  custom_metrics: {}
  date: 2021-10-29_07-29-51
  done: false
  episode_len_mean: 299.09
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.909000000000155
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 4
  episodes_total: 4669
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 0.9671537796656291
          entropy_coeff: 0.009999999999999998
          kl: 0.0055352352317840255
          policy_loss: 0.04519041594531801
          total_loss: 0.8449225174056159
          vf_explained_var: 0.672562301158905
          vf_loss: 0.8049289749728309
    num_agent_steps_sampled: 1376000
    num_agent_steps_trained: 1376000
    num_steps_sampled: 1376000
    num_steps_trained: 1376000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1376,37401.2,1376000,-29.909,-20.1,-52.4,299.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1377000
  custom_metrics: {}
  date: 2021-10-29_07-30-13
  done: false
  episode_len_mean: 300.71
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.071000000000158
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4672
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.2128016816245184
          entropy_coeff: 0.009999999999999998
          kl: 0.01146974439845994
          policy_loss: -0.005773970815870497
          total_loss: 0.5405988113747703
          vf_explained_var: 0.78856360912323
          vf_loss: 0.5492287037066288
    num_agent_steps_sampled: 1377000
    num_agent_steps_trained: 1377000
    num_steps_sampled: 1377000
    num_steps_trained: 1377000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1377,37423.4,1377000,-30.071,-20.1,-52.4,300.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1378000
  custom_metrics: {}
  date: 2021-10-29_07-30-39
  done: false
  episode_len_mean: 303.31
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.33100000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4675
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.3042641487386493
          entropy_coeff: 0.009999999999999998
          kl: 0.019738377250751504
          policy_loss: 0.08903130061096615
          total_loss: 0.6486731830570432
          vf_explained_var: 0.5569515824317932
          vf_loss: 0.5567280924879014
    num_agent_steps_sampled: 1378000
    num_agent_steps_trained: 1378000
    num_steps_sampled: 1378000
    num_steps_trained: 1378000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1378,37449,1378000,-30.331,-20.1,-52.4,303.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1379000
  custom_metrics: {}
  date: 2021-10-29_07-31-10
  done: false
  episode_len_mean: 303.49
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.34900000000016
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 4
  episodes_total: 4679
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 0.9909235550297631
          entropy_coeff: 0.009999999999999998
          kl: 0.009813922428554712
          policy_loss: 0.05219051001800431
          total_loss: 1.2665939307875103
          vf_explained_var: 0.5127524733543396
          vf_loss: 1.2163791000843047
    num_agent_steps_sampled: 1379000
    num_agent_steps_trained: 1379000
    num_steps_sampled: 1379000
    num_steps_trained: 1379000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1379,37480.3,1379000,-30.349,-20.1,-52.4,303.49




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1380000
  custom_metrics: {}
  date: 2021-10-29_07-31-54
  done: false
  episode_len_mean: 302.5
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.250000000000163
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4682
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.378371657265557
          entropy_coeff: 0.009999999999999998
          kl: 0.006805976005739467
          policy_loss: 0.016009077181418738
          total_loss: 0.9345285971959432
          vf_explained_var: 0.36849212646484375
          vf_loss: 0.9268013021184338
    num_agent_steps_sampled: 1380000
    num_agent_steps_trained: 1380000
    num_steps_sampled: 1380000
    num_steps_trained: 1380000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1380,37523.7,1380000,-30.25,-20.1,-52.4,302.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1381000
  custom_metrics: {}
  date: 2021-10-29_07-32-19
  done: false
  episode_len_mean: 304.72
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.472000000000165
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 3
  episodes_total: 4685
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.185614154736201
          entropy_coeff: 0.009999999999999998
          kl: 0.00777817060033183
          policy_loss: 0.027702869143750933
          total_loss: 0.8208637989229626
          vf_explained_var: 0.6032856702804565
          vf_loss: 0.7987292318708367
    num_agent_steps_sampled: 1381000
    num_agent_steps_trained: 1381000
    num_steps_sampled: 1381000
    num_steps_trained: 1381000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1381,37549.6,1381000,-30.472,-20.1,-52.4,304.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1382000
  custom_metrics: {}
  date: 2021-10-29_07-32-39
  done: false
  episode_len_mean: 304.39
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.439000000000156
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 2
  episodes_total: 4687
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.4213662637604607
          entropy_coeff: 0.009999999999999998
          kl: 0.006665214894844951
          policy_loss: -0.07523444924089644
          total_loss: 0.8426087737083435
          vf_explained_var: 0.45595961809158325
          vf_loss: 0.9266687525643242
    num_agent_steps_sampled: 1382000
    num_agent_steps_trained: 1382000
    num_steps_sampled: 1382000
    num_steps_trained: 138200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1382,37568.9,1382000,-30.439,-20.1,-52.4,304.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1383000
  custom_metrics: {}
  date: 2021-10-29_07-33-05
  done: false
  episode_len_mean: 306.29
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.629000000000175
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 4
  episodes_total: 4691
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8083962846106255
          cur_lr: 5.000000000000001e-05
          entropy: 1.1551315059264502
          entropy_coeff: 0.009999999999999998
          kl: 0.09316687475385853
          policy_loss: 0.047499745421939424
          total_loss: 2.2283999856975343
          vf_explained_var: -0.1272866427898407
          vf_loss: 2.117135823186901
    num_agent_steps_sampled: 1383000
    num_agent_steps_trained: 1383000
    num_steps_sampled: 1383000
    num_steps_trained: 1383000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1383,37595.3,1383000,-30.629,-20.1,-52.4,306.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1384000
  custom_metrics: {}
  date: 2021-10-29_07-33-28
  done: false
  episode_len_mean: 308.7
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.87000000000017
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 2
  episodes_total: 4693
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2125944269159386
          cur_lr: 5.000000000000001e-05
          entropy: 1.569892050160302
          entropy_coeff: 0.009999999999999998
          kl: 0.005503266466433527
          policy_loss: -0.06738905393415028
          total_loss: 1.0958631151252323
          vf_explained_var: 0.3828882873058319
          vf_loss: 1.1722778485880958
    num_agent_steps_sampled: 1384000
    num_agent_steps_trained: 1384000
    num_steps_sampled: 1384000
    num_steps_trained: 1384000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1384,37618.2,1384000,-30.87,-20.1,-52.4,308.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1385000
  custom_metrics: {}
  date: 2021-10-29_07-33-59
  done: false
  episode_len_mean: 309.66
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.966000000000175
  episode_reward_min: -52.400000000000475
  episodes_this_iter: 4
  episodes_total: 4697
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2125944269159386
          cur_lr: 5.000000000000001e-05
          entropy: 1.1514051304923163
          entropy_coeff: 0.009999999999999998
          kl: 0.007415891659747271
          policy_loss: 0.015813732064432567
          total_loss: 1.1951053089565702
          vf_explained_var: 0.5415937304496765
          vf_loss: 1.1818131413724688
    num_agent_steps_sampled: 1385000
    num_agent_steps_trained: 1385000
    num_steps_sampled: 1385000
    num_steps_trained: 1385000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1385,37648.8,1385000,-30.966,-20.1,-52.4,309.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1386000
  custom_metrics: {}
  date: 2021-10-29_07-34-29
  done: false
  episode_len_mean: 307.77
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.777000000000168
  episode_reward_min: -48.40000000000042
  episodes_this_iter: 4
  episodes_total: 4701
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2125944269159386
          cur_lr: 5.000000000000001e-05
          entropy: 0.9809739781750573
          entropy_coeff: 0.009999999999999998
          kl: 0.005184764500753324
          policy_loss: -0.020087326566378277
          total_loss: 1.7086009356710645
          vf_explained_var: 0.2805810272693634
          vf_loss: 1.7322109699249268
    num_agent_steps_sampled: 1386000
    num_agent_steps_trained: 1386000
    num_steps_sampled: 1386000
    num_steps_trained: 1386000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1386,37678.6,1386000,-30.777,-20.1,-48.4,307.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1387000
  custom_metrics: {}
  date: 2021-10-29_07-34-58
  done: false
  episode_len_mean: 307.62
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.76200000000017
  episode_reward_min: -48.40000000000042
  episodes_this_iter: 3
  episodes_total: 4704
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2125944269159386
          cur_lr: 5.000000000000001e-05
          entropy: 1.1182690693272486
          entropy_coeff: 0.009999999999999998
          kl: 0.002665909823966928
          policy_loss: -0.008030161758263906
          total_loss: 1.38759912053744
          vf_explained_var: 0.26396656036376953
          vf_loss: 1.4035792870654
    num_agent_steps_sampled: 1387000
    num_agent_steps_trained: 1387000
    num_steps_sampled: 1387000
    num_steps_trained: 1387000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1387,37707.5,1387000,-30.762,-20.1,-48.4,307.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1388000
  custom_metrics: {}
  date: 2021-10-29_07-35-29
  done: false
  episode_len_mean: 305.17
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.517000000000163
  episode_reward_min: -48.40000000000042
  episodes_this_iter: 4
  episodes_total: 4708
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 0.7071040550867717
          entropy_coeff: 0.009999999999999998
          kl: 0.011122411828214891
          policy_loss: -0.058080520646439655
          total_loss: 1.9156175421343908
          vf_explained_var: 0.30191874504089355
          vf_loss: 1.9740255882342657
    num_agent_steps_sampled: 1388000
    num_agent_steps_trained: 1388000
    num_steps_sampled: 1388000
    num_steps_trained: 138800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1388,37738.7,1388000,-30.517,-20.1,-48.4,305.17




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1389000
  custom_metrics: {}
  date: 2021-10-29_07-36-12
  done: false
  episode_len_mean: 306.64
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.66400000000017
  episode_reward_min: -48.40000000000042
  episodes_this_iter: 3
  episodes_total: 4711
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.5444175309605068
          entropy_coeff: 0.009999999999999998
          kl: 0.015382333842418482
          policy_loss: -0.01566195123725467
          total_loss: 0.7935518205165863
          vf_explained_var: 0.6444276571273804
          vf_loss: 0.8153316841357284
    num_agent_steps_sampled: 1389000
    num_agent_steps_trained: 1389000
    num_steps_sampled: 1389000
    num_steps_trained: 1389000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1389,37782.4,1389000,-30.664,-20.1,-48.4,306.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1390000
  custom_metrics: {}
  date: 2021-10-29_07-36-34
  done: false
  episode_len_mean: 306.79
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.67900000000017
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4714
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.5051028754976061
          entropy_coeff: 0.009999999999999998
          kl: 0.011692007810612666
          policy_loss: -0.007484757569101122
          total_loss: 0.9195279253853692
          vf_explained_var: 0.26691925525665283
          vf_loss: 0.9349748618072934
    num_agent_steps_sampled: 1390000
    num_agent_steps_trained: 1390000
    num_steps_sampled: 1390000
    num_steps_trained: 1390000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1390,37804,1390000,-30.679,-20.1,-52.1,306.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1391000
  custom_metrics: {}
  date: 2021-10-29_07-36-54
  done: false
  episode_len_mean: 308.05
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.805000000000167
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 2
  episodes_total: 4716
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.6286794887648688
          entropy_coeff: 0.009999999999999998
          kl: 0.009286177938084547
          policy_loss: -0.04753089613384671
          total_loss: 0.590956629647149
          vf_explained_var: 0.15430554747581482
          vf_loss: 0.6491441304485003
    num_agent_steps_sampled: 1391000
    num_agent_steps_trained: 1391000
    num_steps_sampled: 1391000
    num_steps_trained: 1391000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1391,37824,1391000,-30.805,-20.1,-52.1,308.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1392000
  custom_metrics: {}
  date: 2021-10-29_07-37-18
  done: false
  episode_len_mean: 308.68
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -30.868000000000176
  episode_reward_min: -52.10000000000047
  episodes_this_iter: 3
  episodes_total: 4719
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.4266983376608955
          entropy_coeff: 0.009999999999999998
          kl: 0.009210343045950491
          policy_loss: -0.09746513962745666
          total_loss: 1.2765919599268172
          vf_explained_var: 0.3031570315361023
          vf_loss: 1.3827398671044244
    num_agent_steps_sampled: 1392000
    num_agent_steps_trained: 1392000
    num_steps_sampled: 1392000
    num_steps_trained: 1392000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1392,37847.5,1392000,-30.868,-20.1,-52.1,308.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1393000
  custom_metrics: {}
  date: 2021-10-29_07-37-39
  done: false
  episode_len_mean: 313.67
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -31.36700000000018
  episode_reward_min: -53.50000000000049
  episodes_this_iter: 3
  episodes_total: 4722
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.4355271246698167
          entropy_coeff: 0.009999999999999998
          kl: 0.011235079543291348
          policy_loss: 0.04505189036329587
          total_loss: 1.024833560321066
          vf_explained_var: -0.1140744537115097
          vf_loss: 0.9873251301960813
    num_agent_steps_sampled: 1393000
    num_agent_steps_trained: 1393000
    num_steps_sampled: 1393000
    num_steps_trained: 1393000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1393,37868.7,1393000,-31.367,-20.6,-53.5,313.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1394000
  custom_metrics: {}
  date: 2021-10-29_07-38-02
  done: false
  episode_len_mean: 314.96
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -31.496000000000187
  episode_reward_min: -53.50000000000049
  episodes_this_iter: 3
  episodes_total: 4725
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.412949674659305
          entropy_coeff: 0.009999999999999998
          kl: 0.006665507714373891
          policy_loss: 0.10235937891734971
          total_loss: 0.4402737625771099
          vf_explained_var: 0.18345525860786438
          vf_loss: 0.3480025964478652
    num_agent_steps_sampled: 1394000
    num_agent_steps_trained: 1394000
    num_steps_sampled: 1394000
    num_steps_trained: 1394000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1394,37891.5,1394000,-31.496,-20.6,-53.5,314.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1395000
  custom_metrics: {}
  date: 2021-10-29_07-38-17
  done: false
  episode_len_mean: 318.36
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -31.83600000000019
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 1
  episodes_total: 4726
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.7215807610087925
          entropy_coeff: 0.009999999999999998
          kl: 0.008077821355532479
          policy_loss: -0.07228130946556727
          total_loss: 0.5939843056930436
          vf_explained_var: -0.2702474296092987
          vf_loss: 0.6785838538780808
    num_agent_steps_sampled: 1395000
    num_agent_steps_trained: 1395000
    num_steps_sampled: 1395000
    num_steps_trained: 1395000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1395,37906.4,1395000,-31.836,-20.6,-59.2,318.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1396000
  custom_metrics: {}
  date: 2021-10-29_07-38-40
  done: false
  episode_len_mean: 323.63
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -32.36300000000019
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 3
  episodes_total: 4729
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.579696692360772
          entropy_coeff: 0.009999999999999998
          kl: 0.008553996182897766
          policy_loss: 0.037984886848264274
          total_loss: 1.2619469934039647
          vf_explained_var: -0.1760110706090927
          vf_loss: 1.2345728250013457
    num_agent_steps_sampled: 1396000
    num_agent_steps_trained: 1396000
    num_steps_sampled: 1396000
    num_steps_trained: 1396000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1396,37929.4,1396000,-32.363,-20.6,-59.2,323.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1397000
  custom_metrics: {}
  date: 2021-10-29_07-39-09
  done: false
  episode_len_mean: 321.47
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -32.14700000000019
  episode_reward_min: -59.20000000000057
  episodes_this_iter: 4
  episodes_total: 4733
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.3109321276346841
          entropy_coeff: 0.009999999999999998
          kl: 0.006430545247745516
          policy_loss: 0.025678551693757375
          total_loss: 0.9532674835787879
          vf_explained_var: 0.5741074085235596
          vf_loss: 0.9367994242244296
    num_agent_steps_sampled: 1397000
    num_agent_steps_trained: 1397000
    num_steps_sampled: 1397000
    num_steps_trained: 1397000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1397,37958.8,1397000,-32.147,-20.6,-59.2,321.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1398000
  custom_metrics: {}
  date: 2021-10-29_07-39-28
  done: false
  episode_len_mean: 321.6
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -32.16000000000018
  episode_reward_min: -60.60000000000059
  episodes_this_iter: 2
  episodes_total: 4735
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.420445966720581
          entropy_coeff: 0.009999999999999998
          kl: 0.009026231397163858
          policy_loss: 0.006721811162100898
          total_loss: 0.8012436429659525
          vf_explained_var: -0.23362351953983307
          vf_loss: 0.8032537114289072
    num_agent_steps_sampled: 1398000
    num_agent_steps_trained: 1398000
    num_steps_sampled: 1398000
    num_steps_trained: 1398000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1398,37977.6,1398000,-32.16,-20.6,-60.6,321.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1399000
  custom_metrics: {}
  date: 2021-10-29_07-39-45
  done: false
  episode_len_mean: 326.86
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -32.68600000000019
  episode_reward_min: -60.60000000000059
  episodes_this_iter: 2
  episodes_total: 4737
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.4279453939861722
          entropy_coeff: 0.009999999999999998
          kl: 0.006518697716219969
          policy_loss: 0.07621909280618032
          total_loss: 0.8520964725150002
          vf_explained_var: 0.059880632907152176
          vf_loss: 0.7862045573691527
    num_agent_steps_sampled: 1399000
    num_agent_steps_trained: 1399000
    num_steps_sampled: 1399000
    num_steps_trained: 1399000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1399,37994.7,1399000,-32.686,-20.6,-60.6,326.86




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1400000
  custom_metrics: {}
  date: 2021-10-29_07-40-29
  done: false
  episode_len_mean: 325.71
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -32.5710000000002
  episode_reward_min: -60.60000000000059
  episodes_this_iter: 4
  episodes_total: 4741
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.4058498515023126
          entropy_coeff: 0.009999999999999998
          kl: 0.008704508702090802
          policy_loss: -0.035417664133840135
          total_loss: 0.856881892018848
          vf_explained_var: 0.5116826891899109
          vf_loss: 0.9010805325375663
    num_agent_steps_sampled: 1400000
    num_agent_steps_trained: 1400000
    num_steps_sampled: 1400000
    num_steps_trained: 1400000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1400,38038.5,1400000,-32.571,-20.6,-60.6,325.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1401000
  custom_metrics: {}
  date: 2021-10-29_07-40-51
  done: false
  episode_len_mean: 328.27
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -32.8270000000002
  episode_reward_min: -60.60000000000059
  episodes_this_iter: 2
  episodes_total: 4743
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.4589658617973327
          entropy_coeff: 0.009999999999999998
          kl: 0.008939984994025884
          policy_loss: -0.16177651310960453
          total_loss: 0.868632676700751
          vf_explained_var: 0.166781485080719
          vf_loss: 1.0395785588357183
    num_agent_steps_sampled: 1401000
    num_agent_steps_trained: 1401000
    num_steps_sampled: 1401000
    num_steps_trained: 1401000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1401,38061.1,1401000,-32.827,-20.6,-60.6,328.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1402000
  custom_metrics: {}
  date: 2021-10-29_07-41-20
  done: false
  episode_len_mean: 326.14
  episode_media: {}
  episode_reward_max: -20.600000000000023
  episode_reward_mean: -32.61400000000019
  episode_reward_min: -60.60000000000059
  episodes_this_iter: 4
  episodes_total: 4747
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.333366244369083
          entropy_coeff: 0.009999999999999998
          kl: 0.007831459766878544
          policy_loss: -0.02960612831844224
          total_loss: 1.2534759057892693
          vf_explained_var: 0.4557396471500397
          vf_loss: 1.291667507092158
    num_agent_steps_sampled: 1402000
    num_agent_steps_trained: 1402000
    num_steps_sampled: 1402000
    num_steps_trained: 1402000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1402,38089.3,1402000,-32.614,-20.6,-60.6,326.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1403000
  custom_metrics: {}
  date: 2021-10-29_07-41-41
  done: false
  episode_len_mean: 329.72
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -32.9720000000002
  episode_reward_min: -60.60000000000059
  episodes_this_iter: 3
  episodes_total: 4750
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.3190168248282539
          entropy_coeff: 0.009999999999999998
          kl: 0.008694217190805868
          policy_loss: -0.03766168488396539
          total_loss: 0.9836451143026352
          vf_explained_var: -0.13224920630455017
          vf_loss: 1.0292256908284294
    num_agent_steps_sampled: 1403000
    num_agent_steps_trained: 1403000
    num_steps_sampled: 1403000
    num_steps_trained: 1403000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1403,38110.3,1403000,-32.972,-22.2,-60.6,329.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1404000
  custom_metrics: {}
  date: 2021-10-29_07-41-58
  done: false
  episode_len_mean: 333.34
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -33.3340000000002
  episode_reward_min: -64.10000000000063
  episodes_this_iter: 2
  episodes_total: 4752
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.3005805439419216
          entropy_coeff: 0.009999999999999998
          kl: 0.0091106924376771
          policy_loss: 0.06704582671324412
          total_loss: 0.68242919213242
          vf_explained_var: -0.6172638535499573
          vf_loss: 0.6228653727720181
    num_agent_steps_sampled: 1404000
    num_agent_steps_trained: 1404000
    num_steps_sampled: 1404000
    num_steps_trained: 1404000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1404,38128,1404000,-33.334,-22.2,-64.1,333.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1405000
  custom_metrics: {}
  date: 2021-10-29_07-42-12
  done: false
  episode_len_mean: 337.15
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -33.7150000000002
  episode_reward_min: -64.40000000000062
  episodes_this_iter: 1
  episodes_total: 4753
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6062972134579693
          cur_lr: 5.000000000000001e-05
          entropy: 1.2614986737569174
          entropy_coeff: 0.009999999999999998
          kl: 0.004967581562733159
          policy_loss: -0.05542771029803488
          total_loss: 0.5807458013296127
          vf_explained_var: -0.35232701897621155
          vf_loss: 0.6457766658729978
    num_agent_steps_sampled: 1405000
    num_agent_steps_trained: 1405000
    num_steps_sampled: 1405000
    num_steps_trained: 1405000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1405,38141.7,1405000,-33.715,-22.2,-64.4,337.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1406000
  custom_metrics: {}
  date: 2021-10-29_07-42-28
  done: false
  episode_len_mean: 343.91
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.39100000000021
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 2
  episodes_total: 4755
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 1.2552564024925232
          entropy_coeff: 0.009999999999999998
          kl: 0.0053083860079582955
          policy_loss: 0.0679181393649843
          total_loss: 0.7654842343595293
          vf_explained_var: -0.35064998269081116
          vf_loss: 0.7085094213899639
    num_agent_steps_sampled: 1406000
    num_agent_steps_trained: 1406000
    num_steps_sampled: 1406000
    num_steps_trained: 1406000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1406,38157.4,1406000,-34.391,-22.2,-66.8,343.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1407000
  custom_metrics: {}
  date: 2021-10-29_07-42-55
  done: false
  episode_len_mean: 342.19
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.219000000000214
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4758
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 1.5319307300779554
          entropy_coeff: 0.009999999999999998
          kl: 0.011684604553650502
          policy_loss: -0.024676970971955193
          total_loss: 1.0834263917472629
          vf_explained_var: 0.17477355897426605
          vf_loss: 1.119880493481954
    num_agent_steps_sampled: 1407000
    num_agent_steps_trained: 1407000
    num_steps_sampled: 1407000
    num_steps_trained: 140700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1407,38184.2,1407000,-34.219,-22.2,-66.8,342.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1408000
  custom_metrics: {}
  date: 2021-10-29_07-43-15
  done: false
  episode_len_mean: 345.19
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.51900000000021
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4761
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 1.2311695244577197
          entropy_coeff: 0.009999999999999998
          kl: 0.011642182917568848
          policy_loss: 0.05176170915365219
          total_loss: 1.0458534641398325
          vf_explained_var: 0.2362566888332367
          vf_loss: 1.0028741261611382
    num_agent_steps_sampled: 1408000
    num_agent_steps_trained: 1408000
    num_steps_sampled: 1408000
    num_steps_trained: 1408000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1408,38204.2,1408000,-34.519,-22.2,-66.8,345.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1409000
  custom_metrics: {}
  date: 2021-10-29_07-43-45
  done: false
  episode_len_mean: 344.29
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.429000000000215
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4764
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 1.0864019884003533
          entropy_coeff: 0.009999999999999998
          kl: 0.008542253299863875
          policy_loss: -0.11124661482042736
          total_loss: 1.0182815316650602
          vf_explained_var: 0.3510150611400604
          vf_loss: 1.1378025899330775
    num_agent_steps_sampled: 1409000
    num_agent_steps_trained: 1409000
    num_steps_sampled: 1409000
    num_steps_trained: 1409000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1409,38234.8,1409000,-34.429,-22.2,-66.8,344.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1410000
  custom_metrics: {}
  date: 2021-10-29_07-44-06
  done: false
  episode_len_mean: 345.0
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.50000000000022
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4767
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 1.1305726329485575
          entropy_coeff: 0.009999999999999998
          kl: 0.012313903451974539
          policy_loss: -0.033753783421383965
          total_loss: 0.7884219229221344
          vf_explained_var: 0.5461090803146362
          vf_loss: 0.8297484861479865
    num_agent_steps_sampled: 1410000
    num_agent_steps_trained: 1410000
    num_steps_sampled: 1410000
    num_steps_trained: 1410000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1410,38255.2,1410000,-34.5,-22.2,-66.8,345


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1411000
  custom_metrics: {}
  date: 2021-10-29_07-44-23
  done: false
  episode_len_mean: 350.09
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -35.00900000000022
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 2
  episodes_total: 4769
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 1.20187087059021
          entropy_coeff: 0.009999999999999998
          kl: 0.008121665353000178
          policy_loss: 0.06047887371646033
          total_loss: 0.7114947065711021
          vf_explained_var: 0.5459899306297302
          vf_loss: 0.6605724656126566
    num_agent_steps_sampled: 1411000
    num_agent_steps_trained: 1411000
    num_steps_sampled: 1411000
    num_steps_trained: 1411000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1411,38272.6,1411000,-35.009,-22.2,-66.8,350.09




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1412000
  custom_metrics: {}
  date: 2021-10-29_07-45-06
  done: false
  episode_len_mean: 350.09
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -35.00900000000022
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4772
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 1.1735359973377653
          entropy_coeff: 0.009999999999999998
          kl: 0.01101214210868875
          policy_loss: -0.023814164102077484
          total_loss: 0.8526724603441026
          vf_explained_var: 0.45588383078575134
          vf_loss: 0.8848836733235254
    num_agent_steps_sampled: 1412000
    num_agent_steps_trained: 1412000
    num_steps_sampled: 1412000
    num_steps_trained: 1412000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1412,38315,1412000,-35.009,-22.2,-66.8,350.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1413000
  custom_metrics: {}
  date: 2021-10-29_07-45-31
  done: false
  episode_len_mean: 347.43
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.74300000000022
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4775
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 1.0108868883715736
          entropy_coeff: 0.009999999999999998
          kl: 0.011233595823298753
          policy_loss: -0.06100745888219939
          total_loss: 0.9345527897278468
          vf_explained_var: 0.43061062693595886
          vf_loss: 1.0022636688417859
    num_agent_steps_sampled: 1413000
    num_agent_steps_trained: 1413000
    num_steps_sampled: 1413000
    num_steps_trained: 1413000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1413,38340.7,1413000,-34.743,-22.2,-66.8,347.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1414000
  custom_metrics: {}
  date: 2021-10-29_07-45-54
  done: false
  episode_len_mean: 351.5
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -35.150000000000226
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4778
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 0.9507194704479641
          entropy_coeff: 0.009999999999999998
          kl: 0.007413621408004742
          policy_loss: 0.012659292254183028
          total_loss: 1.4320117712020874
          vf_explained_var: 0.2658770680427551
          vf_loss: 1.4266122605237697
    num_agent_steps_sampled: 1414000
    num_agent_steps_trained: 1414000
    num_steps_sampled: 1414000
    num_steps_trained: 1414000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1414,38363.2,1414000,-35.15,-22.2,-66.8,351.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1415000
  custom_metrics: {}
  date: 2021-10-29_07-46-18
  done: false
  episode_len_mean: 352.08
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -35.208000000000226
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4781
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30314860672898464
          cur_lr: 5.000000000000001e-05
          entropy: 0.9578453063964844
          entropy_coeff: 0.009999999999999998
          kl: 0.004012080435383838
          policy_loss: 0.031132947901884714
          total_loss: 0.7327425753076872
          vf_explained_var: 0.439441978931427
          vf_loss: 0.7099718269374635
    num_agent_steps_sampled: 1415000
    num_agent_steps_trained: 1415000
    num_steps_sampled: 1415000
    num_steps_trained: 1415000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1415,38387.5,1415000,-35.208,-22.2,-66.8,352.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1416000
  custom_metrics: {}
  date: 2021-10-29_07-46-47
  done: false
  episode_len_mean: 350.97
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -35.09700000000023
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4785
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15157430336449232
          cur_lr: 5.000000000000001e-05
          entropy: 0.8354330791367425
          entropy_coeff: 0.009999999999999998
          kl: 0.02262651260272324
          policy_loss: 0.036709170871310766
          total_loss: 1.139775138762262
          vf_explained_var: 0.5493389964103699
          vf_loss: 1.1079906904035144
    num_agent_steps_sampled: 1416000
    num_agent_steps_trained: 1416000
    num_steps_sampled: 1416000
    num_steps_trained: 1416000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1416,38416,1416000,-35.097,-22.2,-66.8,350.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1417000
  custom_metrics: {}
  date: 2021-10-29_07-47-12
  done: false
  episode_len_mean: 348.06
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.806000000000225
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4788
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22736145504673852
          cur_lr: 5.000000000000001e-05
          entropy: 0.8716034286552006
          entropy_coeff: 0.009999999999999998
          kl: 0.00532417209848701
          policy_loss: 0.013277202844619751
          total_loss: 1.1281210001971986
          vf_explained_var: 0.05478242412209511
          vf_loss: 1.1223493280924028
    num_agent_steps_sampled: 1417000
    num_agent_steps_trained: 1417000
    num_steps_sampled: 1417000
    num_steps_trained: 1417000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1417,38441.8,1417000,-34.806,-22.2,-66.8,348.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1418000
  custom_metrics: {}
  date: 2021-10-29_07-47-42
  done: false
  episode_len_mean: 346.09
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.60900000000022
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4792
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22736145504673852
          cur_lr: 5.000000000000001e-05
          entropy: 0.9495669980843862
          entropy_coeff: 0.009999999999999998
          kl: 0.005733420349382199
          policy_loss: -0.006287486685646905
          total_loss: 1.2574058439996507
          vf_explained_var: 0.3133273422718048
          vf_loss: 1.2718854354487525
    num_agent_steps_sampled: 1418000
    num_agent_steps_trained: 1418000
    num_steps_sampled: 1418000
    num_steps_trained: 1418000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1418,38471.7,1418000,-34.609,-22.2,-66.8,346.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1419000
  custom_metrics: {}
  date: 2021-10-29_07-48-08
  done: false
  episode_len_mean: 346.03
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.60300000000022
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4795
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22736145504673852
          cur_lr: 5.000000000000001e-05
          entropy: 0.9484841565291087
          entropy_coeff: 0.009999999999999998
          kl: 0.01242168693464569
          policy_loss: 0.025016549064053428
          total_loss: 1.0979682607783212
          vf_explained_var: 0.015923190861940384
          vf_loss: 1.0796123533613151
    num_agent_steps_sampled: 1419000
    num_agent_steps_trained: 1419000
    num_steps_sampled: 1419000
    num_steps_trained: 1419000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1419,38497.5,1419000,-34.603,-22.2,-66.8,346.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1420000
  custom_metrics: {}
  date: 2021-10-29_07-48-42
  done: false
  episode_len_mean: 345.08
  episode_media: {}
  episode_reward_max: -22.200000000000045
  episode_reward_mean: -34.508000000000216
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4799
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.22736145504673852
          cur_lr: 5.000000000000001e-05
          entropy: 0.8118210123644934
          entropy_coeff: 0.009999999999999998
          kl: 0.0047669272914149805
          policy_loss: 0.02714353957109981
          total_loss: 1.1364974170923232
          vf_explained_var: 0.2949962615966797
          vf_loss: 1.11638825668229
    num_agent_steps_sampled: 1420000
    num_agent_steps_trained: 1420000
    num_steps_sampled: 1420000
    num_steps_trained: 1420000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1420,38531.4,1420000,-34.508,-22.2,-66.8,345.08




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1421000
  custom_metrics: {}
  date: 2021-10-29_07-49-29
  done: false
  episode_len_mean: 342.43
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -34.243000000000215
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4803
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11368072752336926
          cur_lr: 5.000000000000001e-05
          entropy: 0.8734541396299998
          entropy_coeff: 0.009999999999999998
          kl: 0.02517035846760578
          policy_loss: 0.0349842492904928
          total_loss: 1.2663244558705224
          vf_explained_var: 0.5966282486915588
          vf_loss: 1.2372133662303288
    num_agent_steps_sampled: 1421000
    num_agent_steps_trained: 1421000
    num_steps_sampled: 1421000
    num_steps_trained: 1421000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1421,38578.4,1421000,-34.243,-19.5,-66.8,342.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1422000
  custom_metrics: {}
  date: 2021-10-29_07-49-59
  done: false
  episode_len_mean: 344.01
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -34.40100000000022
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4807
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17052109128505386
          cur_lr: 5.000000000000001e-05
          entropy: 0.7105474061436123
          entropy_coeff: 0.009999999999999998
          kl: 0.015239359530331203
          policy_loss: -0.13420217517349456
          total_loss: 1.3213150527742175
          vf_explained_var: 0.2686552405357361
          vf_loss: 1.4600240773624844
    num_agent_steps_sampled: 1422000
    num_agent_steps_trained: 1422000
    num_steps_sampled: 1422000
    num_steps_trained: 1422000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1422,38608.3,1422000,-34.401,-19.5,-66.8,344.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1423000
  custom_metrics: {}
  date: 2021-10-29_07-50-28
  done: false
  episode_len_mean: 341.44
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -34.14400000000021
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4811
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17052109128505386
          cur_lr: 5.000000000000001e-05
          entropy: 0.6334833469655778
          entropy_coeff: 0.009999999999999998
          kl: 0.009770907593666797
          policy_loss: -0.06775832639800178
          total_loss: 0.7050470742914412
          vf_explained_var: 0.735572874546051
          vf_loss: 0.7774740990665224
    num_agent_steps_sampled: 1423000
    num_agent_steps_trained: 1423000
    num_steps_sampled: 1423000
    num_steps_trained: 1423000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1423,38637.1,1423000,-34.144,-19.5,-66.8,341.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1424000
  custom_metrics: {}
  date: 2021-10-29_07-51-00
  done: false
  episode_len_mean: 336.14
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -33.6140000000002
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4815
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17052109128505386
          cur_lr: 5.000000000000001e-05
          entropy: 0.6210505770312416
          entropy_coeff: 0.009999999999999998
          kl: 0.012846362489971045
          policy_loss: -0.011889868312411837
          total_loss: 0.7858099506960975
          vf_explained_var: 0.4132698178291321
          vf_loss: 0.8017197493049834
    num_agent_steps_sampled: 1424000
    num_agent_steps_trained: 1424000
    num_steps_sampled: 1424000
    num_steps_trained: 1424000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1424,38669,1424000,-33.614,-19.5,-66.8,336.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1425000
  custom_metrics: {}
  date: 2021-10-29_07-51-29
  done: false
  episode_len_mean: 333.63
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -33.3630000000002
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4819
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17052109128505386
          cur_lr: 5.000000000000001e-05
          entropy: 0.5304964042372173
          entropy_coeff: 0.009999999999999998
          kl: 0.007606007598076777
          policy_loss: 0.15414780601859093
          total_loss: 1.0090303467379675
          vf_explained_var: 0.48112139105796814
          vf_loss: 0.858890516228146
    num_agent_steps_sampled: 1425000
    num_agent_steps_trained: 1425000
    num_steps_sampled: 1425000
    num_steps_trained: 1425000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1425,38698.2,1425000,-33.363,-19.5,-66.8,333.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1426000
  custom_metrics: {}
  date: 2021-10-29_07-52-00
  done: false
  episode_len_mean: 328.01
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -32.801000000000194
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4823
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17052109128505386
          cur_lr: 5.000000000000001e-05
          entropy: 0.4786124676465988
          entropy_coeff: 0.009999999999999998
          kl: 0.01390024921927867
          policy_loss: -0.03760364204645157
          total_loss: 0.898358886109458
          vf_explained_var: 0.5057039260864258
          vf_loss: 0.9383783691459232
    num_agent_steps_sampled: 1426000
    num_agent_steps_trained: 1426000
    num_steps_sampled: 1426000
    num_steps_trained: 1426000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1426,38729.1,1426000,-32.801,-19.5,-66.8,328.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1427000
  custom_metrics: {}
  date: 2021-10-29_07-52-28
  done: false
  episode_len_mean: 322.7
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -32.27000000000019
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4826
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17052109128505386
          cur_lr: 5.000000000000001e-05
          entropy: 0.5180508269204034
          entropy_coeff: 0.009999999999999998
          kl: 0.009462881335279387
          policy_loss: 0.043899589942561254
          total_loss: 0.8669794744915432
          vf_explained_var: 0.6135449409484863
          vf_loss: 0.8266467698746257
    num_agent_steps_sampled: 1427000
    num_agent_steps_trained: 1427000
    num_steps_sampled: 1427000
    num_steps_trained: 1427000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1427,38757.6,1427000,-32.27,-19.5,-66.8,322.7




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1428000
  custom_metrics: {}
  date: 2021-10-29_07-53-18
  done: false
  episode_len_mean: 315.8
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -31.580000000000176
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 5
  episodes_total: 4831
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17052109128505386
          cur_lr: 5.000000000000001e-05
          entropy: 0.5416119671530194
          entropy_coeff: 0.009999999999999998
          kl: 0.023632577284703003
          policy_loss: -0.01668672627872891
          total_loss: 1.0024100131458706
          vf_explained_var: 0.5019473433494568
          vf_loss: 1.0204829924636416
    num_agent_steps_sampled: 1428000
    num_agent_steps_trained: 1428000
    num_steps_sampled: 1428000
    num_steps_trained: 1428000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1428,38807.5,1428000,-31.58,-19.5,-66.8,315.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1429000
  custom_metrics: {}
  date: 2021-10-29_07-53-56
  done: false
  episode_len_mean: 312.51
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -31.25100000000017
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4834
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2557816369275807
          cur_lr: 5.000000000000001e-05
          entropy: 0.46557195981343585
          entropy_coeff: 0.009999999999999998
          kl: 0.012300862861445062
          policy_loss: -0.09571625399920676
          total_loss: 0.7788066109849348
          vf_explained_var: 0.353213369846344
          vf_loss: 0.8760322315825356
    num_agent_steps_sampled: 1429000
    num_agent_steps_trained: 1429000
    num_steps_sampled: 1429000
    num_steps_trained: 1429000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1429,38844.8,1429000,-31.251,-19.5,-66.8,312.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1430000
  custom_metrics: {}
  date: 2021-10-29_07-54-19
  done: false
  episode_len_mean: 307.82
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -30.782000000000163
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4838
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2557816369275807
          cur_lr: 5.000000000000001e-05
          entropy: 0.6521035472551981
          entropy_coeff: 0.009999999999999998
          kl: 0.006845909640758179
          policy_loss: 0.03204464746846093
          total_loss: 1.1895383881198036
          vf_explained_var: 0.27428895235061646
          vf_loss: 1.162263712949223
    num_agent_steps_sampled: 1430000
    num_agent_steps_trained: 1430000
    num_steps_sampled: 1430000
    num_steps_trained: 1430000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1430,38868.2,1430000,-30.782,-19.5,-66.8,307.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1431000
  custom_metrics: {}
  date: 2021-10-29_07-54-50
  done: false
  episode_len_mean: 304.46
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -30.44600000000016
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 4
  episodes_total: 4842
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2557816369275807
          cur_lr: 5.000000000000001e-05
          entropy: 0.37786764204502105
          entropy_coeff: 0.009999999999999998
          kl: 0.019669020471567773
          policy_loss: -0.014852488454845216
          total_loss: 0.6758574611610837
          vf_explained_var: 0.6159171462059021
          vf_loss: 0.6894576519727706
    num_agent_steps_sampled: 1431000
    num_agent_steps_trained: 1431000
    num_steps_sampled: 1431000
    num_steps_trained: 1431000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1431,38899.3,1431000,-30.446,-19.5,-66.8,304.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1432000
  custom_metrics: {}
  date: 2021-10-29_07-55-15
  done: false
  episode_len_mean: 305.4
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -30.540000000000163
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4845
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2557816369275807
          cur_lr: 5.000000000000001e-05
          entropy: 0.7477766358190112
          entropy_coeff: 0.009999999999999998
          kl: 0.013661039981983219
          policy_loss: 0.011573251999086804
          total_loss: 0.7903364155027601
          vf_explained_var: 0.2289135605096817
          vf_loss: 0.7827466891043716
    num_agent_steps_sampled: 1432000
    num_agent_steps_trained: 1432000
    num_steps_sampled: 1432000
    num_steps_trained: 1432000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1432,38924.3,1432000,-30.54,-19.5,-66.8,305.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1433000
  custom_metrics: {}
  date: 2021-10-29_07-55-35
  done: false
  episode_len_mean: 307.72
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -30.772000000000165
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 2
  episodes_total: 4847
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2557816369275807
          cur_lr: 5.000000000000001e-05
          entropy: 0.8507608864042494
          entropy_coeff: 0.009999999999999998
          kl: 0.0245037235999363
          policy_loss: -0.008437399152252409
          total_loss: 0.7295553103089333
          vf_explained_var: -0.029225116595625877
          vf_loss: 0.7402327164593671
    num_agent_steps_sampled: 1433000
    num_agent_steps_trained: 1433000
    num_steps_sampled: 1433000
    num_steps_trained: 143300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1433,38944.2,1433000,-30.772,-19.5,-66.8,307.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1434000
  custom_metrics: {}
  date: 2021-10-29_07-55-56
  done: false
  episode_len_mean: 307.04
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -30.70400000000016
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4850
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3836724553913712
          cur_lr: 5.000000000000001e-05
          entropy: 0.6324337697691388
          entropy_coeff: 0.009999999999999998
          kl: 0.007388834343478992
          policy_loss: -0.05571737065911293
          total_loss: 1.1290145927005344
          vf_explained_var: 0.3867679536342621
          vf_loss: 1.1882214195198484
    num_agent_steps_sampled: 1434000
    num_agent_steps_trained: 1434000
    num_steps_sampled: 1434000
    num_steps_trained: 1434000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1434,38965.3,1434000,-30.704,-19.5,-66.8,307.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1435000
  custom_metrics: {}
  date: 2021-10-29_07-56-20
  done: false
  episode_len_mean: 302.12
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -30.212000000000153
  episode_reward_min: -66.80000000000048
  episodes_this_iter: 3
  episodes_total: 4853
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3836724553913712
          cur_lr: 5.000000000000001e-05
          entropy: 0.5916286459399595
          entropy_coeff: 0.009999999999999998
          kl: 0.0038245272953885334
          policy_loss: -0.1229562251104249
          total_loss: 1.308248840437995
          vf_explained_var: 0.12275394797325134
          vf_loss: 1.4356539752748279
    num_agent_steps_sampled: 1435000
    num_agent_steps_trained: 1435000
    num_steps_sampled: 1435000
    num_steps_trained: 1435000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1435,38989.2,1435000,-30.212,-19.5,-66.8,302.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1436000
  custom_metrics: {}
  date: 2021-10-29_07-56-46
  done: false
  episode_len_mean: 294.46
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.446000000000154
  episode_reward_min: -59.30000000000057
  episodes_this_iter: 4
  episodes_total: 4857
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1918362276956856
          cur_lr: 5.000000000000001e-05
          entropy: 0.5661859641472499
          entropy_coeff: 0.009999999999999998
          kl: 0.010124151733447523
          policy_loss: -0.06013648824559318
          total_loss: 0.8347067150804731
          vf_explained_var: 0.5494269728660583
          vf_loss: 0.8985628770457373
    num_agent_steps_sampled: 1436000
    num_agent_steps_trained: 1436000
    num_steps_sampled: 1436000
    num_steps_trained: 1436000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1436,39014.6,1436000,-29.446,-19.5,-59.3,294.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1437000
  custom_metrics: {}
  date: 2021-10-29_07-57-06
  done: false
  episode_len_mean: 296.77
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.677000000000152
  episode_reward_min: -59.30000000000057
  episodes_this_iter: 2
  episodes_total: 4859
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1918362276956856
          cur_lr: 5.000000000000001e-05
          entropy: 0.8425527917014228
          entropy_coeff: 0.009999999999999998
          kl: 0.03302331754776241
          policy_loss: -0.07969834986660215
          total_loss: 1.7126643548409144
          vf_explained_var: 0.38787391781806946
          vf_loss: 1.7944531778494517
    num_agent_steps_sampled: 1437000
    num_agent_steps_trained: 1437000
    num_steps_sampled: 1437000
    num_steps_trained: 1437000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1437,39035.3,1437000,-29.677,-19.5,-59.3,296.77




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1438000
  custom_metrics: {}
  date: 2021-10-29_07-57-45
  done: false
  episode_len_mean: 295.08
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.50800000000015
  episode_reward_min: -56.000000000000526
  episodes_this_iter: 3
  episodes_total: 4862
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28775434154352836
          cur_lr: 5.000000000000001e-05
          entropy: 0.6084544453356001
          entropy_coeff: 0.009999999999999998
          kl: 0.003357661676466724
          policy_loss: -0.06320923964182536
          total_loss: 1.1496462570296393
          vf_explained_var: 0.37971705198287964
          vf_loss: 1.2179738554689619
    num_agent_steps_sampled: 1438000
    num_agent_steps_trained: 1438000
    num_steps_sampled: 1438000
    num_steps_trained: 143800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1438,39073.9,1438000,-29.508,-19.5,-56,295.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1439000
  custom_metrics: {}
  date: 2021-10-29_07-57-59
  done: false
  episode_len_mean: 302.16
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -30.21600000000015
  episode_reward_min: -74.80000000000003
  episodes_this_iter: 2
  episodes_total: 4864
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14387717077176418
          cur_lr: 5.000000000000001e-05
          entropy: 0.7999836881955464
          entropy_coeff: 0.009999999999999998
          kl: 0.01292069648995514
          policy_loss: 0.14542216898666488
          total_loss: 0.5953141012953387
          vf_explained_var: 0.27704232931137085
          vf_loss: 0.4560327776643034
    num_agent_steps_sampled: 1439000
    num_agent_steps_trained: 1439000
    num_steps_sampled: 1439000
    num_steps_trained: 1439000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1439,39087.7,1439000,-30.216,-19.5,-74.8,302.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1440000
  custom_metrics: {}
  date: 2021-10-29_07-58-10
  done: false
  episode_len_mean: 309.27
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -30.927000000000135
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 1
  episodes_total: 4865
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14387717077176418
          cur_lr: 5.000000000000001e-05
          entropy: 0.12941607944667338
          entropy_coeff: 0.009999999999999998
          kl: 0.006511912660286731
          policy_loss: 0.18773241109318203
          total_loss: 0.19973303228616715
          vf_explained_var: -0.533149778842926
          vf_loss: 0.012357866169056958
    num_agent_steps_sampled: 1440000
    num_agent_steps_trained: 1440000
    num_steps_sampled: 1440000
    num_steps_trained: 1440

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1440,39099.1,1440000,-30.927,-19.5,-97.6,309.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1441000
  custom_metrics: {}
  date: 2021-10-29_07-58-41
  done: false
  episode_len_mean: 301.15
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -30.115000000000123
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4869
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14387717077176418
          cur_lr: 5.000000000000001e-05
          entropy: 0.18973444948593776
          entropy_coeff: 0.009999999999999998
          kl: 0.0015979606616253268
          policy_loss: -0.0015667335854636299
          total_loss: 1.4396874017185635
          vf_explained_var: 0.21327830851078033
          vf_loss: 1.4429215603404575
    num_agent_steps_sampled: 1441000
    num_agent_steps_trained: 1441000
    num_steps_sampled: 1441000
    num_steps_trained: 14

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1441,39130.1,1441000,-30.115,-19.5,-97.6,301.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1442000
  custom_metrics: {}
  date: 2021-10-29_07-59-09
  done: false
  episode_len_mean: 298.3
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.83000000000012
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4873
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07193858538588209
          cur_lr: 5.000000000000001e-05
          entropy: 0.4204150115450223
          entropy_coeff: 0.009999999999999998
          kl: 0.01747729084616528
          policy_loss: 0.0664104829231898
          total_loss: 1.241995029979282
          vf_explained_var: 0.31626176834106445
          vf_loss: 1.1785314189063179
    num_agent_steps_sampled: 1442000
    num_agent_steps_trained: 1442000
    num_steps_sampled: 1442000
    num_steps_trained: 1442000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1442,39157.7,1442000,-29.83,-19.5,-97.6,298.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1443000
  custom_metrics: {}
  date: 2021-10-29_07-59-37
  done: false
  episode_len_mean: 297.76
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.77600000000012
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4876
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07193858538588209
          cur_lr: 5.000000000000001e-05
          entropy: 0.575910226504008
          entropy_coeff: 0.009999999999999998
          kl: 0.0356070659228272
          policy_loss: -0.038284913036558364
          total_loss: 0.9656474169757631
          vf_explained_var: 0.35289379954338074
          vf_loss: 1.0071299112505383
    num_agent_steps_sampled: 1443000
    num_agent_steps_trained: 1443000
    num_steps_sampled: 1443000
    num_steps_trained: 1443000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1443,39185.4,1443000,-29.776,-19.5,-97.6,297.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1444000
  custom_metrics: {}
  date: 2021-10-29_08-00-03
  done: false
  episode_len_mean: 294.3
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.430000000000117
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4880
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10790787807882316
          cur_lr: 5.000000000000001e-05
          entropy: 0.29948411534229913
          entropy_coeff: 0.009999999999999998
          kl: 0.006868934599334864
          policy_loss: 0.0313482902944088
          total_loss: 1.105021721786923
          vf_explained_var: 0.27244147658348083
          vf_loss: 1.075927056123813
    num_agent_steps_sampled: 1444000
    num_agent_steps_trained: 1444000
    num_steps_sampled: 1444000
    num_steps_trained: 1444000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1444,39211.4,1444000,-29.43,-19.5,-97.6,294.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1445000
  custom_metrics: {}
  date: 2021-10-29_08-00-34
  done: false
  episode_len_mean: 292.45
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.245000000000115
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4884
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10790787807882316
          cur_lr: 5.000000000000001e-05
          entropy: 0.23984948789907826
          entropy_coeff: 0.009999999999999998
          kl: 0.005310362995977346
          policy_loss: 0.0428819853398535
          total_loss: 1.4601782606707678
          vf_explained_var: 0.4219922423362732
          vf_loss: 1.4191217420829667
    num_agent_steps_sampled: 1445000
    num_agent_steps_trained: 1445000
    num_steps_sampled: 1445000
    num_steps_trained: 1445000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1445,39242.4,1445000,-29.245,-19.5,-97.6,292.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1446000
  custom_metrics: {}
  date: 2021-10-29_08-01-02
  done: false
  episode_len_mean: 292.19
  episode_media: {}
  episode_reward_max: -19.500000000000007
  episode_reward_mean: -29.219000000000115
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4887
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.10790787807882316
          cur_lr: 5.000000000000001e-05
          entropy: 0.34990998804569245
          entropy_coeff: 0.009999999999999998
          kl: 0.0036885928768811025
          policy_loss: -0.039632803367243875
          total_loss: 0.8859382695621915
          vf_explained_var: 0.5772346258163452
          vf_loss: 0.928672147459454
    num_agent_steps_sampled: 1446000
    num_agent_steps_trained: 1446000
    num_steps_sampled: 1446000
    num_steps_trained: 14460

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1446,39270.4,1446000,-29.219,-19.5,-97.6,292.19




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1447000
  custom_metrics: {}
  date: 2021-10-29_08-01-50
  done: false
  episode_len_mean: 290.49
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -29.049000000000113
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 5
  episodes_total: 4892
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.05395393903941158
          cur_lr: 5.000000000000001e-05
          entropy: 0.6200336722036203
          entropy_coeff: 0.009999999999999998
          kl: 0.022938542672228675
          policy_loss: 0.0906068175824152
          total_loss: 1.0931103103690678
          vf_explained_var: 0.5908664464950562
          vf_loss: 1.0074661870797474
    num_agent_steps_sampled: 1447000
    num_agent_steps_trained: 1447000
    num_steps_sampled: 1447000
    num_steps_trained: 1447000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1447,39318.3,1447000,-29.049,-17.9,-97.6,290.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1448000
  custom_metrics: {}
  date: 2021-10-29_08-02-12
  done: false
  episode_len_mean: 291.38
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -29.138000000000105
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 2
  episodes_total: 4894
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08093090855911735
          cur_lr: 5.000000000000001e-05
          entropy: 1.044869609673818
          entropy_coeff: 0.009999999999999998
          kl: 0.05700610870175953
          policy_loss: -0.06574901789426804
          total_loss: 1.0215233021312289
          vf_explained_var: 0.19204512238502502
          vf_loss: 1.093107445538044
    num_agent_steps_sampled: 1448000
    num_agent_steps_trained: 1448000
    num_steps_sampled: 1448000
    num_steps_trained: 1448000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1448,39341,1448000,-29.138,-17.9,-97.6,291.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1449000
  custom_metrics: {}
  date: 2021-10-29_08-02-39
  done: false
  episode_len_mean: 293.68
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -29.368000000000112
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4898
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.12139636283867603
          cur_lr: 5.000000000000001e-05
          entropy: 1.1305843181080288
          entropy_coeff: 0.009999999999999998
          kl: 0.0636118732279488
          policy_loss: 0.016812861545218362
          total_loss: 1.315760350227356
          vf_explained_var: 0.28084641695022583
          vf_loss: 1.3025310807757908
    num_agent_steps_sampled: 1449000
    num_agent_steps_trained: 1449000
    num_steps_sampled: 1449000
    num_steps_trained: 1449000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1449,39368,1449000,-29.368,-17.9,-97.6,293.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1450000
  custom_metrics: {}
  date: 2021-10-29_08-03-01
  done: false
  episode_len_mean: 296.79
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -29.67900000000012
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 2
  episodes_total: 4900
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.182094544258014
          cur_lr: 5.000000000000001e-05
          entropy: 1.0374633845355776
          entropy_coeff: 0.009999999999999998
          kl: 0.034863156390726936
          policy_loss: -0.06354210817565521
          total_loss: 0.9979825395262903
          vf_explained_var: 0.5729334354400635
          vf_loss: 1.0655508882883522
    num_agent_steps_sampled: 1450000
    num_agent_steps_trained: 1450000
    num_steps_sampled: 1450000
    num_steps_trained: 1450000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1450,39390,1450000,-29.679,-17.9,-97.6,296.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1451000
  custom_metrics: {}
  date: 2021-10-29_08-03-22
  done: false
  episode_len_mean: 301.22
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -30.122000000000124
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4903
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.273141816387021
          cur_lr: 5.000000000000001e-05
          entropy: 1.4111086116896736
          entropy_coeff: 0.009999999999999998
          kl: 0.012550876592605459
          policy_loss: 0.030832630727026196
          total_loss: 1.094947975873947
          vf_explained_var: 0.09653020650148392
          vf_loss: 1.0747982536753018
    num_agent_steps_sampled: 1451000
    num_agent_steps_trained: 1451000
    num_steps_sampled: 1451000
    num_steps_trained: 1451000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1451,39410.7,1451000,-30.122,-17.9,-97.6,301.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1452000
  custom_metrics: {}
  date: 2021-10-29_08-03-44
  done: false
  episode_len_mean: 302.93
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -30.293000000000124
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4906
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.273141816387021
          cur_lr: 5.000000000000001e-05
          entropy: 1.1444076034757826
          entropy_coeff: 0.009999999999999998
          kl: 0.01802172473816069
          policy_loss: -0.09986458669106166
          total_loss: 0.49122881525092654
          vf_explained_var: 0.45160266757011414
          vf_loss: 0.5976149937344922
    num_agent_steps_sampled: 1452000
    num_agent_steps_trained: 1452000
    num_steps_sampled: 1452000
    num_steps_trained: 1452000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1452,39432.4,1452000,-30.293,-17.9,-97.6,302.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1453000
  custom_metrics: {}
  date: 2021-10-29_08-04-06
  done: false
  episode_len_mean: 305.17
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -30.517000000000134
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 2
  episodes_total: 4908
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.273141816387021
          cur_lr: 5.000000000000001e-05
          entropy: 1.139224703444375
          entropy_coeff: 0.009999999999999998
          kl: 0.02423741126337586
          policy_loss: -0.09891016036272049
          total_loss: 0.7830175959401661
          vf_explained_var: 0.6144255995750427
          vf_loss: 0.8866997675763236
    num_agent_steps_sampled: 1453000
    num_agent_steps_trained: 1453000
    num_steps_sampled: 1453000
    num_steps_trained: 1453000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1453,39455,1453000,-30.517,-17.9,-97.6,305.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1454000
  custom_metrics: {}
  date: 2021-10-29_08-04-29
  done: false
  episode_len_mean: 308.05
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -30.80500000000013
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4911
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 1.0824054559071858
          entropy_coeff: 0.009999999999999998
          kl: 0.01797753508982538
          policy_loss: -0.05649086551533805
          total_loss: 0.9613916284508175
          vf_explained_var: 0.5565999746322632
          vf_loss: 1.0213409254948298
    num_agent_steps_sampled: 1454000
    num_agent_steps_trained: 1454000
    num_steps_sampled: 1454000
    num_steps_trained: 1454000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1454,39477.1,1454000,-30.805,-17.9,-97.6,308.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1455000
  custom_metrics: {}
  date: 2021-10-29_08-04-52
  done: false
  episode_len_mean: 311.32
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.13200000000014
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4914
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 1.2559094336297778
          entropy_coeff: 0.009999999999999998
          kl: 0.011610419701574375
          policy_loss: 0.028072287804550594
          total_loss: 1.1244602849086125
          vf_explained_var: 0.027186671271920204
          vf_loss: 1.1041901575194464
    num_agent_steps_sampled: 1455000
    num_agent_steps_trained: 1455000
    num_steps_sampled: 1455000
    num_steps_trained: 1455000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1455,39501,1455000,-31.132,-17.9,-97.6,311.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1456000
  custom_metrics: {}
  date: 2021-10-29_08-05-17
  done: false
  episode_len_mean: 313.67
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.367000000000143
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4917
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.8782609754138523
          entropy_coeff: 0.009999999999999998
          kl: 0.007819909841391533
          policy_loss: -0.014830723653237026
          total_loss: 0.8082227339347203
          vf_explained_var: 0.5250480771064758
          vf_loss: 0.8286321590344111
    num_agent_steps_sampled: 1456000
    num_agent_steps_trained: 1456000
    num_steps_sampled: 1456000
    num_steps_trained: 1456000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1456,39525.2,1456000,-31.367,-17.9,-97.6,313.67




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1457000
  custom_metrics: {}
  date: 2021-10-29_08-05-56
  done: false
  episode_len_mean: 315.4
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.54000000000015
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4920
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.9557673950990041
          entropy_coeff: 0.009999999999999998
          kl: 0.011633294005914034
          policy_loss: -0.15964064970612526
          total_loss: 0.6123468872573641
          vf_explained_var: 0.6358072757720947
          vf_loss: 0.7767789016167322
    num_agent_steps_sampled: 1457000
    num_agent_steps_trained: 1457000
    num_steps_sampled: 1457000
    num_steps_trained: 1457000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1457,39565,1457000,-31.54,-17.9,-97.6,315.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1458000
  custom_metrics: {}
  date: 2021-10-29_08-06-21
  done: false
  episode_len_mean: 316.97
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.697000000000152
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4923
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.8318707737657759
          entropy_coeff: 0.009999999999999998
          kl: 0.006392343339187525
          policy_loss: -0.053529408077398935
          total_loss: 1.1252262857225206
          vf_explained_var: 0.26103073358535767
          vf_loss: 1.1844553814993963
    num_agent_steps_sampled: 1458000
    num_agent_steps_trained: 1458000
    num_steps_sampled: 1458000
    num_steps_trained: 145800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1458,39589.8,1458000,-31.697,-17.9,-97.6,316.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1459000
  custom_metrics: {}
  date: 2021-10-29_08-06-46
  done: false
  episode_len_mean: 319.29
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.929000000000155
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4927
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.9846886575222016
          entropy_coeff: 0.009999999999999998
          kl: 0.008957960063069079
          policy_loss: 0.028802635934617786
          total_loss: 0.5191440542538961
          vf_explained_var: 0.7175478339195251
          vf_loss: 0.496518118513955
    num_agent_steps_sampled: 1459000
    num_agent_steps_trained: 1459000
    num_steps_sampled: 1459000
    num_steps_trained: 1459000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1459,39614.5,1459000,-31.929,-17.9,-97.6,319.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1460000
  custom_metrics: {}
  date: 2021-10-29_08-07-08
  done: false
  episode_len_mean: 322.72
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -32.272000000000155
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 2
  episodes_total: 4929
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 1.2629672050476075
          entropy_coeff: 0.009999999999999998
          kl: 0.009680093244706454
          policy_loss: -0.06515981894400384
          total_loss: 0.531106170742876
          vf_explained_var: 0.1653769314289093
          vf_loss: 0.6049295913841989
    num_agent_steps_sampled: 1460000
    num_agent_steps_trained: 1460000
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1460,39636.2,1460000,-32.272,-17.9,-97.6,322.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1461000
  custom_metrics: {}
  date: 2021-10-29_08-07-33
  done: false
  episode_len_mean: 323.83
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -32.38300000000016
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4933
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.8686695224708981
          entropy_coeff: 0.009999999999999998
          kl: 0.009706587357053091
          policy_loss: 0.025158964925342135
          total_loss: 1.1659654425250159
          vf_explained_var: 0.45564642548561096
          vf_loss: 1.1455162723859151
    num_agent_steps_sampled: 1461000
    num_agent_steps_trained: 1461000
    num_steps_sampled: 1461000
    num_steps_trained: 1461000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1461,39661.9,1461000,-32.383,-17.9,-97.6,323.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1462000
  custom_metrics: {}
  date: 2021-10-29_08-08-00
  done: false
  episode_len_mean: 324.49
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -32.44900000000016
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4936
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.877119533220927
          entropy_coeff: 0.009999999999999998
          kl: 0.009520558308428543
          policy_loss: -0.08907005315025647
          total_loss: 1.031577773226632
          vf_explained_var: 0.30104854702949524
          vf_loss: 1.1255183286137052
    num_agent_steps_sampled: 1462000
    num_agent_steps_trained: 1462000
    num_steps_sampled: 1462000
    num_steps_trained: 1462000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1462,39688.4,1462000,-32.449,-17.9,-97.6,324.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1463000
  custom_metrics: {}
  date: 2021-10-29_08-08-25
  done: false
  episode_len_mean: 324.97
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -32.49700000000016
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4940
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.9338512294822269
          entropy_coeff: 0.009999999999999998
          kl: 0.006692738941217474
          policy_loss: -0.03925849836733606
          total_loss: 1.119829491774241
          vf_explained_var: 0.32334470748901367
          vf_loss: 1.165684394372834
    num_agent_steps_sampled: 1463000
    num_agent_steps_trained: 1463000
    num_steps_sampled: 1463000
    num_steps_trained: 1463000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1463,39713.5,1463000,-32.497,-17.9,-97.6,324.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1464000
  custom_metrics: {}
  date: 2021-10-29_08-08-53
  done: false
  episode_len_mean: 325.52
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -32.55200000000016
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4943
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.7991971969604492
          entropy_coeff: 0.009999999999999998
          kl: 0.007230338967773441
          policy_loss: -0.06524084077941047
          total_loss: 0.94609704249435
          vf_explained_var: 0.4061625897884369
          vf_loss: 1.0163674917485979
    num_agent_steps_sampled: 1464000
    num_agent_steps_trained: 1464000
    num_steps_sampled: 1464000
    num_steps_trained: 1464000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1464,39741.4,1464000,-32.552,-17.9,-97.6,325.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1465000
  custom_metrics: {}
  date: 2021-10-29_08-09-20
  done: false
  episode_len_mean: 322.99
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -32.299000000000156
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4947
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.8142262412442102
          entropy_coeff: 0.009999999999999998
          kl: 0.008843272104237737
          policy_loss: 0.01932577465971311
          total_loss: 1.0315023448732163
          vf_explained_var: 0.33575931191444397
          vf_loss: 1.016695621278551
    num_agent_steps_sampled: 1465000
    num_agent_steps_trained: 1465000
    num_steps_sampled: 1465000
    num_steps_trained: 1465000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1465,39768.5,1465000,-32.299,-17.9,-97.6,322.99




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1466000
  custom_metrics: {}
  date: 2021-10-29_08-10-01
  done: false
  episode_len_mean: 320.93
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -32.09300000000015
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4950
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4097127245805315
          cur_lr: 5.000000000000001e-05
          entropy: 0.9542773928907182
          entropy_coeff: 0.009999999999999998
          kl: 0.004504098419027456
          policy_loss: 0.0037921784238682853
          total_loss: 0.5444256875250074
          vf_explained_var: 0.7649000287055969
          vf_loss: 0.5483308952715662
    num_agent_steps_sampled: 1466000
    num_agent_steps_trained: 1466000
    num_steps_sampled: 1466000
    num_steps_trained: 1466000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1466,39809.7,1466000,-32.093,-17.9,-97.6,320.93


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1467000
  custom_metrics: {}
  date: 2021-10-29_08-10-28
  done: false
  episode_len_mean: 319.59
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.959000000000145
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4953
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20485636229026574
          cur_lr: 5.000000000000001e-05
          entropy: 0.9127614650461409
          entropy_coeff: 0.009999999999999998
          kl: 0.01511578208760527
          policy_loss: -0.12684178782833946
          total_loss: 1.329107344812817
          vf_explained_var: 0.28368425369262695
          vf_loss: 1.4619801746474372
    num_agent_steps_sampled: 1467000
    num_agent_steps_trained: 1467000
    num_steps_sampled: 1467000
    num_steps_trained: 1467000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1467,39836.6,1467000,-31.959,-17.9,-97.6,319.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1468000
  custom_metrics: {}
  date: 2021-10-29_08-10-54
  done: false
  episode_len_mean: 319.05
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.90500000000015
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 4
  episodes_total: 4957
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20485636229026574
          cur_lr: 5.000000000000001e-05
          entropy: 0.8550200694137149
          entropy_coeff: 0.009999999999999998
          kl: 0.011005373918374436
          policy_loss: 0.003110721872912513
          total_loss: 1.040192637178633
          vf_explained_var: 0.2937135398387909
          vf_loss: 1.0433775934908125
    num_agent_steps_sampled: 1468000
    num_agent_steps_trained: 1468000
    num_steps_sampled: 1468000
    num_steps_trained: 1468000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1468,39862.5,1468000,-31.905,-17.9,-97.6,319.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1469000
  custom_metrics: {}
  date: 2021-10-29_08-11-17
  done: false
  episode_len_mean: 319.45
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.945000000000153
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 2
  episodes_total: 4959
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20485636229026574
          cur_lr: 5.000000000000001e-05
          entropy: 1.2834707180658975
          entropy_coeff: 0.009999999999999998
          kl: 0.01597660166445615
          policy_loss: -0.06329937742816077
          total_loss: 1.002590959601932
          vf_explained_var: 0.22571945190429688
          vf_loss: 1.0754521373245451
    num_agent_steps_sampled: 1469000
    num_agent_steps_trained: 1469000
    num_steps_sampled: 1469000
    num_steps_trained: 1469000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1469,39884.7,1469000,-31.945,-17.9,-97.6,319.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1470000
  custom_metrics: {}
  date: 2021-10-29_08-11-39
  done: false
  episode_len_mean: 319.68
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.968000000000156
  episode_reward_min: -97.59999999999873
  episodes_this_iter: 3
  episodes_total: 4962
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20485636229026574
          cur_lr: 5.000000000000001e-05
          entropy: 1.0574121938811407
          entropy_coeff: 0.009999999999999998
          kl: 0.028458861287706187
          policy_loss: -0.08576715302964051
          total_loss: 1.0665986802842882
          vf_explained_var: 0.3879833221435547
          vf_loss: 1.157109977139367
    num_agent_steps_sampled: 1470000
    num_agent_steps_trained: 1470000
    num_steps_sampled: 1470000
    num_steps_trained: 1470000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1470,39907,1470000,-31.968,-17.9,-97.6,319.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1471000
  custom_metrics: {}
  date: 2021-10-29_08-12-02
  done: false
  episode_len_mean: 307.72
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -30.772000000000165
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 3
  episodes_total: 4965
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30728454343539857
          cur_lr: 5.000000000000001e-05
          entropy: 1.0234975775082906
          entropy_coeff: 0.009999999999999998
          kl: 0.012523062419185275
          policy_loss: -0.09603469206227197
          total_loss: 0.8799001422193315
          vf_explained_var: 0.40715596079826355
          vf_loss: 0.9823216676712037
    num_agent_steps_sampled: 1471000
    num_agent_steps_trained: 1471000
    num_steps_sampled: 1471000
    num_steps_trained: 147100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1471,39930.2,1471000,-30.772,-17.9,-45.7,307.72


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1472000
  custom_metrics: {}
  date: 2021-10-29_08-12-27
  done: false
  episode_len_mean: 310.63
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.063000000000173
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 4969
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30728454343539857
          cur_lr: 5.000000000000001e-05
          entropy: 0.9026320570045048
          entropy_coeff: 0.009999999999999998
          kl: 0.013180916523904074
          policy_loss: -0.06135974534683757
          total_loss: 1.150806736946106
          vf_explained_var: 0.25216200947761536
          vf_loss: 1.217142517036862
    num_agent_steps_sampled: 1472000
    num_agent_steps_trained: 1472000
    num_steps_sampled: 1472000
    num_steps_trained: 1472000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1472,39955.1,1472000,-31.063,-17.9,-45.7,310.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1473000
  custom_metrics: {}
  date: 2021-10-29_08-12-51
  done: false
  episode_len_mean: 312.89
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.289000000000165
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 3
  episodes_total: 4972
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30728454343539857
          cur_lr: 5.000000000000001e-05
          entropy: 1.0311548272768656
          entropy_coeff: 0.009999999999999998
          kl: 0.015623331959459898
          policy_loss: 0.0396578547027376
          total_loss: 0.6329971043599977
          vf_explained_var: 0.2868882715702057
          vf_loss: 0.5988499815265338
    num_agent_steps_sampled: 1473000
    num_agent_steps_trained: 1473000
    num_steps_sampled: 1473000
    num_steps_trained: 1473000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1473,39979.4,1473000,-31.289,-17.9,-45.7,312.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1474000
  custom_metrics: {}
  date: 2021-10-29_08-13-18
  done: false
  episode_len_mean: 311.84
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.184000000000168
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 3
  episodes_total: 4975
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30728454343539857
          cur_lr: 5.000000000000001e-05
          entropy: 0.7561385479238298
          entropy_coeff: 0.009999999999999998
          kl: 0.012638020813361095
          policy_loss: -0.13305356949567795
          total_loss: 1.0228006025155385
          vf_explained_var: 0.37530940771102905
          vf_loss: 1.1595320834053888
    num_agent_steps_sampled: 1474000
    num_agent_steps_trained: 1474000
    num_steps_sampled: 1474000
    num_steps_trained: 147400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1474,40006,1474000,-31.184,-17.9,-45.7,311.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1475000
  custom_metrics: {}
  date: 2021-10-29_08-13-47
  done: false
  episode_len_mean: 311.51
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.151000000000177
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 4979
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30728454343539857
          cur_lr: 5.000000000000001e-05
          entropy: 0.635510007209248
          entropy_coeff: 0.009999999999999998
          kl: 0.010682209911984823
          policy_loss: -0.02107313142882453
          total_loss: 0.6592404181758563
          vf_explained_var: 0.6683499813079834
          vf_loss: 0.6833861741754744
    num_agent_steps_sampled: 1475000
    num_agent_steps_trained: 1475000
    num_steps_sampled: 1475000
    num_steps_trained: 1475000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1475,40034.9,1475000,-31.151,-17.9,-45.7,311.51




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1476000
  custom_metrics: {}
  date: 2021-10-29_08-14-32
  done: false
  episode_len_mean: 312.21
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.221000000000178
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 4983
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30728454343539857
          cur_lr: 5.000000000000001e-05
          entropy: 0.7689524233341217
          entropy_coeff: 0.009999999999999998
          kl: 0.012883090109882565
          policy_loss: -0.00632932765616311
          total_loss: 0.9705643932024638
          vf_explained_var: 0.3706819713115692
          vf_loss: 0.9806244605117374
    num_agent_steps_sampled: 1476000
    num_agent_steps_trained: 1476000
    num_steps_sampled: 1476000
    num_steps_trained: 1476000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1476,40080.3,1476000,-31.221,-17.9,-45.7,312.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1477000
  custom_metrics: {}
  date: 2021-10-29_08-14-59
  done: false
  episode_len_mean: 312.46
  episode_media: {}
  episode_reward_max: -17.899999999999984
  episode_reward_mean: -31.246000000000173
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 3
  episodes_total: 4986
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30728454343539857
          cur_lr: 5.000000000000001e-05
          entropy: 0.7843334476153055
          entropy_coeff: 0.009999999999999998
          kl: 0.013058460661839967
          policy_loss: -0.100997680094507
          total_loss: 0.8465529640515645
          vf_explained_var: 0.44230911135673523
          vf_loss: 0.9513813164499071
    num_agent_steps_sampled: 1477000
    num_agent_steps_trained: 1477000
    num_steps_sampled: 1477000
    num_steps_trained: 1477000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1477,40107.4,1477000,-31.246,-17.9,-45.7,312.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1478000
  custom_metrics: {}
  date: 2021-10-29_08-15-27
  done: false
  episode_len_mean: 313.67
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -31.367000000000175
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 4990
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30728454343539857
          cur_lr: 5.000000000000001e-05
          entropy: 0.546194968952073
          entropy_coeff: 0.009999999999999998
          kl: 0.003776913533448894
          policy_loss: -0.14670919784241251
          total_loss: 1.2625041359000735
          vf_explained_var: 0.3112592101097107
          vf_loss: 1.4135147134462993
    num_agent_steps_sampled: 1478000
    num_agent_steps_trained: 1478000
    num_steps_sampled: 1478000
    num_steps_trained: 1478000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1478,40135.2,1478000,-31.367,-22.4,-45.7,313.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1479000
  custom_metrics: {}
  date: 2021-10-29_08-15-53
  done: false
  episode_len_mean: 312.8
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -31.280000000000182
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 4
  episodes_total: 4994
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15364227171769929
          cur_lr: 5.000000000000001e-05
          entropy: 0.9625191999806298
          entropy_coeff: 0.009999999999999998
          kl: 0.006917152806446027
          policy_loss: 0.01716049760580063
          total_loss: 1.1155275172657437
          vf_explained_var: 0.38103246688842773
          vf_loss: 1.1069294578499265
    num_agent_steps_sampled: 1479000
    num_agent_steps_trained: 1479000
    num_steps_sampled: 1479000
    num_steps_trained: 1479000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1479,40160.9,1479000,-31.28,-22.4,-44.5,312.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1480000
  custom_metrics: {}
  date: 2021-10-29_08-16-16
  done: false
  episode_len_mean: 312.36
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -31.236000000000168
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 4997
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15364227171769929
          cur_lr: 5.000000000000001e-05
          entropy: 1.1952928728527492
          entropy_coeff: 0.009999999999999998
          kl: 0.007065361041209586
          policy_loss: 0.05070935214559237
          total_loss: 0.9574112037817637
          vf_explained_var: 0.24897079169750214
          vf_loss: 0.9175692367884848
    num_agent_steps_sampled: 1480000
    num_agent_steps_trained: 1480000
    num_steps_sampled: 1480000
    num_steps_trained: 1480000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1480,40183.8,1480000,-31.236,-22.4,-44.5,312.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1481000
  custom_metrics: {}
  date: 2021-10-29_08-16-46
  done: false
  episode_len_mean: 308.8
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.88000000000016
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 4
  episodes_total: 5001
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.15364227171769929
          cur_lr: 5.000000000000001e-05
          entropy: 0.4947731256484985
          entropy_coeff: 0.009999999999999998
          kl: 0.004352779055919993
          policy_loss: 0.030778779668940437
          total_loss: 0.7614782823456658
          vf_explained_var: 0.6353632211685181
          vf_loss: 0.7349784513314565
    num_agent_steps_sampled: 1481000
    num_agent_steps_trained: 1481000
    num_steps_sampled: 1481000
    num_steps_trained: 1481000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1481,40213.7,1481000,-30.88,-22.4,-44.5,308.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1482000
  custom_metrics: {}
  date: 2021-10-29_08-17-11
  done: false
  episode_len_mean: 306.22
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.622000000000163
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5004
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.07682113585884964
          cur_lr: 5.000000000000001e-05
          entropy: 1.0234920223553976
          entropy_coeff: 0.009999999999999998
          kl: 0.024890998222911295
          policy_loss: 0.07036328953173425
          total_loss: 0.9081494513485167
          vf_explained_var: 0.16058135032653809
          vf_loss: 0.8461089242663649
    num_agent_steps_sampled: 1482000
    num_agent_steps_trained: 1482000
    num_steps_sampled: 1482000
    num_steps_trained: 1482000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1482,40238.6,1482000,-30.622,-22.4,-44.5,306.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1483000
  custom_metrics: {}
  date: 2021-10-29_08-17-33
  done: false
  episode_len_mean: 304.72
  episode_media: {}
  episode_reward_max: -22.40000000000005
  episode_reward_mean: -30.47200000000016
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5007
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1152317037882745
          cur_lr: 5.000000000000001e-05
          entropy: 1.2148567895094553
          entropy_coeff: 0.009999999999999998
          kl: 0.014179685388983464
          policy_loss: 0.12191483063830269
          total_loss: 0.4404521107673645
          vf_explained_var: 0.6726638674736023
          vf_loss: 0.32905189957883624
    num_agent_steps_sampled: 1483000
    num_agent_steps_trained: 1483000
    num_steps_sampled: 1483000
    num_steps_trained: 1483000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1483,40261.3,1483000,-30.472,-22.4,-44.5,304.72




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1484000
  custom_metrics: {}
  date: 2021-10-29_08-18-22
  done: false
  episode_len_mean: 300.71
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -30.071000000000158
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 4
  episodes_total: 5011
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1152317037882745
          cur_lr: 5.000000000000001e-05
          entropy: 0.8368732002046373
          entropy_coeff: 0.009999999999999998
          kl: 0.02398338882601231
          policy_loss: -0.00026437855429119536
          total_loss: 0.7349912653366725
          vf_explained_var: 0.6169048547744751
          vf_loss: 0.7408607340521283
    num_agent_steps_sampled: 1484000
    num_agent_steps_trained: 1484000
    num_steps_sampled: 1484000
    num_steps_trained: 1484000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1484,40310.4,1484000,-30.071,-21.8,-44.5,300.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1485000
  custom_metrics: {}
  date: 2021-10-29_08-18-45
  done: false
  episode_len_mean: 298.82
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.882000000000154
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5014
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17284755568241178
          cur_lr: 5.000000000000001e-05
          entropy: 1.1927072346210479
          entropy_coeff: 0.009999999999999998
          kl: 0.019310071071425246
          policy_loss: -0.11986695379018783
          total_loss: 0.6115500993198819
          vf_explained_var: 0.6259623765945435
          vf_loss: 0.7400064253144794
    num_agent_steps_sampled: 1485000
    num_agent_steps_trained: 1485000
    num_steps_sampled: 1485000
    num_steps_trained: 1485000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1485,40333.2,1485000,-29.882,-21.8,-44.5,298.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1486000
  custom_metrics: {}
  date: 2021-10-29_08-19-09
  done: false
  episode_len_mean: 299.91
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.99100000000016
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5017
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17284755568241178
          cur_lr: 5.000000000000001e-05
          entropy: 1.4007145788934496
          entropy_coeff: 0.009999999999999998
          kl: 0.01512305527102187
          policy_loss: -0.058201584882206385
          total_loss: 0.8745948957072364
          vf_explained_var: 0.3649110198020935
          vf_loss: 0.9441896362437142
    num_agent_steps_sampled: 1486000
    num_agent_steps_trained: 1486000
    num_steps_sampled: 1486000
    num_steps_trained: 1486000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1486,40357.1,1486000,-29.991,-21.8,-44.5,299.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1487000
  custom_metrics: {}
  date: 2021-10-29_08-19-35
  done: false
  episode_len_mean: 298.06
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.806000000000154
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 4
  episodes_total: 5021
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17284755568241178
          cur_lr: 5.000000000000001e-05
          entropy: 1.1120844529734717
          entropy_coeff: 0.009999999999999998
          kl: 0.023470019584047225
          policy_loss: -0.08870718719230758
          total_loss: 0.9191771477460862
          vf_explained_var: 0.5044775605201721
          vf_loss: 1.0149484352933036
    num_agent_steps_sampled: 1487000
    num_agent_steps_trained: 1487000
    num_steps_sampled: 1487000
    num_steps_trained: 1487000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1487,40382.8,1487000,-29.806,-21.8,-44.5,298.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1488000
  custom_metrics: {}
  date: 2021-10-29_08-20-03
  done: false
  episode_len_mean: 297.24
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.72400000000015
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5024
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.25927133352361764
          cur_lr: 5.000000000000001e-05
          entropy: 1.0439468132125007
          entropy_coeff: 0.009999999999999998
          kl: 0.022041389189992974
          policy_loss: -0.10344951351483662
          total_loss: 0.7121815135909452
          vf_explained_var: 0.4805804491043091
          vf_loss: 0.8203558054235246
    num_agent_steps_sampled: 1488000
    num_agent_steps_trained: 1488000
    num_steps_sampled: 1488000
    num_steps_trained: 1488000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1488,40410.8,1488000,-29.724,-21.8,-44.5,297.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1489000
  custom_metrics: {}
  date: 2021-10-29_08-20-26
  done: false
  episode_len_mean: 296.11
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.61100000000015
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 4
  episodes_total: 5028
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3889070002854265
          cur_lr: 5.000000000000001e-05
          entropy: 1.19088953004943
          entropy_coeff: 0.009999999999999998
          kl: 0.01745537105651398
          policy_loss: 0.057469821472962694
          total_loss: 0.738504531317287
          vf_explained_var: 0.6463715434074402
          vf_loss: 0.6861550824509727
    num_agent_steps_sampled: 1489000
    num_agent_steps_trained: 1489000
    num_steps_sampled: 1489000
    num_steps_trained: 1489000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1489,40434,1489000,-29.611,-21.8,-44.5,296.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1490000
  custom_metrics: {}
  date: 2021-10-29_08-20-54
  done: false
  episode_len_mean: 294.33
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.43300000000015
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5031
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3889070002854265
          cur_lr: 5.000000000000001e-05
          entropy: 1.0041364808877309
          entropy_coeff: 0.009999999999999998
          kl: 0.020532268520417396
          policy_loss: -0.06982136915127436
          total_loss: 0.419277546968725
          vf_explained_var: 0.1536426991224289
          vf_loss: 0.4911551417575942
    num_agent_steps_sampled: 1490000
    num_agent_steps_trained: 1490000
    num_steps_sampled: 1490000
    num_steps_trained: 1490000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1490,40461.8,1490000,-29.433,-21.8,-44.5,294.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1491000
  custom_metrics: {}
  date: 2021-10-29_08-21-21
  done: false
  episode_len_mean: 294.23
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.423000000000147
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 4
  episodes_total: 5035
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5833605004281398
          cur_lr: 5.000000000000001e-05
          entropy: 1.1730899002816941
          entropy_coeff: 0.009999999999999998
          kl: 0.009309050996112154
          policy_loss: 0.03190649209751023
          total_loss: 0.6948141826523675
          vf_explained_var: 0.7472610473632812
          vf_loss: 0.6692080583837298
    num_agent_steps_sampled: 1491000
    num_agent_steps_trained: 1491000
    num_steps_sampled: 1491000
    num_steps_trained: 1491000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1491,40488.4,1491000,-29.423,-21.8,-44.5,294.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1492000
  custom_metrics: {}
  date: 2021-10-29_08-21-46
  done: false
  episode_len_mean: 294.82
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.482000000000152
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5038
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5833605004281398
          cur_lr: 5.000000000000001e-05
          entropy: 1.2943040947119395
          entropy_coeff: 0.009999999999999998
          kl: 0.024553695961339234
          policy_loss: 0.03902184930112627
          total_loss: 0.6167479142857094
          vf_explained_var: 0.7620264887809753
          vf_loss: 0.5763454475336605
    num_agent_steps_sampled: 1492000
    num_agent_steps_trained: 1492000
    num_steps_sampled: 1492000
    num_steps_trained: 1492000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1492,40513.9,1492000,-29.482,-21.8,-44.5,294.82




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1493000
  custom_metrics: {}
  date: 2021-10-29_08-22-30
  done: false
  episode_len_mean: 294.13
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.413000000000146
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 4
  episodes_total: 5042
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.1650963286558786
          entropy_coeff: 0.009999999999999998
          kl: 0.00953623848361676
          policy_loss: -0.007160456023282475
          total_loss: 1.0255201439062753
          vf_explained_var: 0.48495641350746155
          vf_loss: 1.0359869725174373
    num_agent_steps_sampled: 1493000
    num_agent_steps_trained: 1493000
    num_steps_sampled: 1493000
    num_steps_trained: 1493000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1493,40557.4,1493000,-29.413,-21.8,-44.5,294.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1494000
  custom_metrics: {}
  date: 2021-10-29_08-22-55
  done: false
  episode_len_mean: 294.82
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.48200000000015
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5045
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.2909500002861023
          entropy_coeff: 0.009999999999999998
          kl: 0.010049606231778165
          policy_loss: 0.09756471713383992
          total_loss: 0.7115184846851561
          vf_explained_var: 0.6041948199272156
          vf_loss: 0.6180694470802943
    num_agent_steps_sampled: 1494000
    num_agent_steps_trained: 1494000
    num_steps_sampled: 1494000
    num_steps_trained: 1494000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1494,40582.6,1494000,-29.482,-21.8,-44.5,294.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1495000
  custom_metrics: {}
  date: 2021-10-29_08-23-21
  done: false
  episode_len_mean: 294.65
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.46500000000015
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 4
  episodes_total: 5049
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.00734294851621
          entropy_coeff: 0.009999999999999998
          kl: 0.009403441923573692
          policy_loss: 0.044308736754788296
          total_loss: 0.9512340088685354
          vf_explained_var: 0.5345140695571899
          vf_loss: 0.9087703039248785
    num_agent_steps_sampled: 1495000
    num_agent_steps_trained: 1495000
    num_steps_sampled: 1495000
    num_steps_trained: 1495000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1495,40608.7,1495000,-29.465,-21.8,-44.5,294.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1496000
  custom_metrics: {}
  date: 2021-10-29_08-23-46
  done: false
  episode_len_mean: 294.15
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.415000000000155
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5052
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.0177049265967475
          entropy_coeff: 0.009999999999999998
          kl: 0.0057620209111155065
          policy_loss: 0.07641139378150304
          total_loss: 0.7098639249801636
          vf_explained_var: 0.4456418752670288
          vf_loss: 0.6385875773098734
    num_agent_steps_sampled: 1496000
    num_agent_steps_trained: 1496000
    num_steps_sampled: 1496000
    num_steps_trained: 1496000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1496,40633.6,1496000,-29.415,-21.8,-44.5,294.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1497000
  custom_metrics: {}
  date: 2021-10-29_08-24-07
  done: false
  episode_len_mean: 296.05
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.60500000000015
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5055
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.532152341471778
          entropy_coeff: 0.009999999999999998
          kl: 0.007660267309257998
          policy_loss: 0.027992378175258636
          total_loss: 1.168073046207428
          vf_explained_var: 0.27490314841270447
          vf_loss: 1.1486991559465727
    num_agent_steps_sampled: 1497000
    num_agent_steps_trained: 1497000
    num_steps_sampled: 1497000
    num_steps_trained: 1497000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1497,40654.7,1497000,-29.605,-21.8,-44.5,296.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1498000
  custom_metrics: {}
  date: 2021-10-29_08-24-31
  done: false
  episode_len_mean: 295.63
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.56300000000015
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5058
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.267144238286548
          entropy_coeff: 0.009999999999999998
          kl: 0.009740898677292789
          policy_loss: 0.12478631974922286
          total_loss: 0.6858053541845746
          vf_explained_var: 0.60167396068573
          vf_loss: 0.5651667911145423
    num_agent_steps_sampled: 1498000
    num_agent_steps_trained: 1498000
    num_steps_sampled: 1498000
    num_steps_trained: 1498000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1498,40679,1498000,-29.563,-21.8,-41.8,295.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1499000
  custom_metrics: {}
  date: 2021-10-29_08-24-55
  done: false
  episode_len_mean: 295.62
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.562000000000147
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5061
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.431812008221944
          entropy_coeff: 0.009999999999999998
          kl: 0.008679046956914032
          policy_loss: 0.07852587542600102
          total_loss: 1.0117025481330024
          vf_explained_var: 0.35444384813308716
          vf_loss: 0.9399002651373546
    num_agent_steps_sampled: 1499000
    num_agent_steps_trained: 1499000
    num_steps_sampled: 1499000
    num_steps_trained: 1499000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1499,40702.5,1499000,-29.562,-21.8,-41.8,295.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1500000
  custom_metrics: {}
  date: 2021-10-29_08-25-16
  done: false
  episode_len_mean: 295.68
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.56800000000015
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5064
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.5506884098052978
          entropy_coeff: 0.009999999999999998
          kl: 0.014707715209765186
          policy_loss: 0.05016457231508361
          total_loss: 1.0381378259923724
          vf_explained_var: 0.2985469102859497
          vf_loss: 0.9906102996733454
    num_agent_steps_sampled: 1500000
    num_agent_steps_trained: 1500000
    num_steps_sampled: 1500000
    num_steps_trained: 1500000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1500,40723.9,1500000,-29.568,-21.8,-41.8,295.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1501000
  custom_metrics: {}
  date: 2021-10-29_08-25-39
  done: false
  episode_len_mean: 295.76
  episode_media: {}
  episode_reward_max: -21.80000000000004
  episode_reward_mean: -29.57600000000015
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 2
  episodes_total: 5066
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.3528458582030403
          entropy_coeff: 0.009999999999999998
          kl: 0.012193934878918494
          policy_loss: -0.07762167503436407
          total_loss: 0.6977538115448422
          vf_explained_var: 0.018674761056900024
          vf_loss: 0.778233747680982
    num_agent_steps_sampled: 1501000
    num_agent_steps_trained: 1501000
    num_steps_sampled: 1501000
    num_steps_trained: 1501000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1501,40746.1,1501000,-29.576,-21.8,-41.8,295.76




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1502000
  custom_metrics: {}
  date: 2021-10-29_08-26-23
  done: false
  episode_len_mean: 294.54
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -29.454000000000146
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 5070
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.1784458034568364
          entropy_coeff: 0.009999999999999998
          kl: 0.008872798440240616
          policy_loss: -0.007180794990724987
          total_loss: 0.857756538523568
          vf_explained_var: 0.5875781774520874
          vf_loss: 0.868957730796602
    num_agent_steps_sampled: 1502000
    num_agent_steps_trained: 1502000
    num_steps_sampled: 1502000
    num_steps_trained: 1502000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1502,40790.1,1502000,-29.454,-21.7,-41.8,294.54


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1503000
  custom_metrics: {}
  date: 2021-10-29_08-26-48
  done: false
  episode_len_mean: 295.39
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -29.539000000000147
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5073
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.3219547046555413
          entropy_coeff: 0.009999999999999998
          kl: 0.013950598399167942
          policy_loss: -0.07772334946526421
          total_loss: 0.7515081312921312
          vf_explained_var: 0.5510746240615845
          vf_loss: 0.8302436861726973
    num_agent_steps_sampled: 1503000
    num_agent_steps_trained: 1503000
    num_steps_sampled: 1503000
    num_steps_trained: 1503000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1503,40815.6,1503000,-29.539,-21.7,-41.8,295.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1504000
  custom_metrics: {}
  date: 2021-10-29_08-27-13
  done: false
  episode_len_mean: 296.11
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -29.611000000000153
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 5077
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8750407506422099
          cur_lr: 5.000000000000001e-05
          entropy: 1.3857386761241488
          entropy_coeff: 0.009999999999999998
          kl: 0.021878132181088084
          policy_loss: 0.03529145999087228
          total_loss: 0.9109153406487571
          vf_explained_var: 0.5237827897071838
          vf_loss: 0.8703370051251518
    num_agent_steps_sampled: 1504000
    num_agent_steps_trained: 1504000
    num_steps_sampled: 1504000
    num_steps_trained: 1504000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1504,40840,1504000,-29.611,-21.7,-41.8,296.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1505000
  custom_metrics: {}
  date: 2021-10-29_08-27-38
  done: false
  episode_len_mean: 297.63
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -29.76300000000016
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5080
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3125611259633145
          cur_lr: 5.000000000000001e-05
          entropy: 1.111687406566408
          entropy_coeff: 0.009999999999999998
          kl: 0.0038247173481488674
          policy_loss: 0.018905570026901033
          total_loss: 0.43142271290222806
          vf_explained_var: 0.6941098570823669
          vf_loss: 0.41861383732822205
    num_agent_steps_sampled: 1505000
    num_agent_steps_trained: 1505000
    num_steps_sampled: 1505000
    num_steps_trained: 1505000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1505,40865.7,1505000,-29.763,-21.7,-41.8,297.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1506000
  custom_metrics: {}
  date: 2021-10-29_08-28-03
  done: false
  episode_len_mean: 297.83
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -29.783000000000158
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5083
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.3615371916029189
          entropy_coeff: 0.009999999999999998
          kl: 0.01584641596113651
          policy_loss: -0.13184841275215148
          total_loss: 1.0165007528331544
          vf_explained_var: 0.46158602833747864
          vf_loss: 1.151564837164349
    num_agent_steps_sampled: 1506000
    num_agent_steps_trained: 1506000
    num_steps_sampled: 1506000
    num_steps_trained: 1506000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1506,40890.9,1506000,-29.783,-21.7,-41.8,297.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1507000
  custom_metrics: {}
  date: 2021-10-29_08-28-29
  done: false
  episode_len_mean: 298.82
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -29.882000000000154
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 5087
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.3089528282483418
          entropy_coeff: 0.009999999999999998
          kl: 0.012478752427170904
          policy_loss: -0.013889733122454749
          total_loss: 0.7454848153723611
          vf_explained_var: 0.5949963927268982
          vf_loss: 0.7642745183573829
    num_agent_steps_sampled: 1507000
    num_agent_steps_trained: 1507000
    num_steps_sampled: 1507000
    num_steps_trained: 1507000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1507,40916.5,1507000,-29.882,-21.7,-41.8,298.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1508000
  custom_metrics: {}
  date: 2021-10-29_08-28-55
  done: false
  episode_len_mean: 299.79
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -29.979000000000152
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5090
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.100999445385403
          entropy_coeff: 0.009999999999999998
          kl: 0.00935882355813986
          policy_loss: -0.13272425913148456
          total_loss: 0.7940531586607297
          vf_explained_var: 0.4225074350833893
          vf_loss: 0.9316453870799807
    num_agent_steps_sampled: 1508000
    num_agent_steps_trained: 1508000
    num_steps_sampled: 1508000
    num_steps_trained: 1508000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1508,40942.1,1508000,-29.979,-21.7,-41.8,299.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1509000
  custom_metrics: {}
  date: 2021-10-29_08-29-18
  done: false
  episode_len_mean: 299.95
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -29.99500000000016
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5093
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.1876083135604858
          entropy_coeff: 0.009999999999999998
          kl: 0.013617687919601511
          policy_loss: -0.12221046313643455
          total_loss: 0.4956616009275118
          vf_explained_var: 0.7409667372703552
          vf_loss: 0.6208111253049639
    num_agent_steps_sampled: 1509000
    num_agent_steps_trained: 1509000
    num_steps_sampled: 1509000
    num_steps_trained: 1509000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1509,40965.7,1509000,-29.995,-21.7,-41.8,299.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1510000
  custom_metrics: {}
  date: 2021-10-29_08-29-43
  done: false
  episode_len_mean: 300.0
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -30.000000000000156
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 5097
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.295848090118832
          entropy_coeff: 0.009999999999999998
          kl: 0.016780072518338903
          policy_loss: -0.14499818798568515
          total_loss: 1.312998902797699
          vf_explained_var: 0.20929169654846191
          vf_loss: 1.4599431375662486
    num_agent_steps_sampled: 1510000
    num_agent_steps_trained: 1510000
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1510,40990.5,1510000,-30,-21.7,-41.8,300




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1511000
  custom_metrics: {}
  date: 2021-10-29_08-30-23
  done: false
  episode_len_mean: 302.2
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -30.220000000000162
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5100
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.2471887363327874
          entropy_coeff: 0.009999999999999998
          kl: 0.006573653476716067
          policy_loss: -0.049709407488505046
          total_loss: 0.6250389632251527
          vf_explained_var: 0.18739917874336243
          vf_loss: 0.6829060996572177
    num_agent_steps_sampled: 1511000
    num_agent_steps_trained: 1511000
    num_steps_sampled: 1511000
    num_steps_trained: 1511000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1511,41030.5,1511000,-30.22,-21.7,-41.8,302.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1512000
  custom_metrics: {}
  date: 2021-10-29_08-30-50
  done: false
  episode_len_mean: 302.56
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -30.256000000000157
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5103
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.1275966584682464
          entropy_coeff: 0.009999999999999998
          kl: 0.011427750094173758
          policy_loss: 0.06110668033361435
          total_loss: 0.5026971889866723
          vf_explained_var: 0.8299857378005981
          vf_loss: 0.4453666639824708
    num_agent_steps_sampled: 1512000
    num_agent_steps_trained: 1512000
    num_steps_sampled: 1512000
    num_steps_trained: 1512000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1512,41057.6,1512000,-30.256,-21.7,-41.8,302.56


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1513000
  custom_metrics: {}
  date: 2021-10-29_08-31-10
  done: false
  episode_len_mean: 304.64
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -30.464000000000166
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5106
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.5029808031188117
          entropy_coeff: 0.009999999999999998
          kl: 0.010349824930319409
          policy_loss: 0.0616541373439961
          total_loss: 0.7132477750380833
          vf_explained_var: 0.40120333433151245
          vf_loss: 0.6598310609037678
    num_agent_steps_sampled: 1513000
    num_agent_steps_trained: 1513000
    num_steps_sampled: 1513000
    num_steps_trained: 1513000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1513,41076.9,1513000,-30.464,-21.7,-41.8,304.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1514000
  custom_metrics: {}
  date: 2021-10-29_08-31-32
  done: false
  episode_len_mean: 306.26
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -30.62600000000016
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5109
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.4249933494461908
          entropy_coeff: 0.009999999999999998
          kl: 0.016804810935210548
          policy_loss: 0.032200104577673805
          total_loss: 0.8770223478476207
          vf_explained_var: 0.4110049307346344
          vf_loss: 0.8480434954166413
    num_agent_steps_sampled: 1514000
    num_agent_steps_trained: 1514000
    num_steps_sampled: 1514000
    num_steps_trained: 1514000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1514,41099.5,1514000,-30.626,-21.7,-41.8,306.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1515000
  custom_metrics: {}
  date: 2021-10-29_08-31-53
  done: false
  episode_len_mean: 309.19
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -30.919000000000167
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 2
  episodes_total: 5111
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.4206945525275336
          entropy_coeff: 0.009999999999999998
          kl: 0.011825977783194119
          policy_loss: -0.05660993456840515
          total_loss: 1.0047950751251644
          vf_explained_var: 0.27489447593688965
          vf_loss: 1.067850797623396
    num_agent_steps_sampled: 1515000
    num_agent_steps_trained: 1515000
    num_steps_sampled: 1515000
    num_steps_trained: 1515000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1515,41120.5,1515000,-30.919,-21.7,-41.8,309.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1516000
  custom_metrics: {}
  date: 2021-10-29_08-32-16
  done: false
  episode_len_mean: 309.91
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -30.99100000000017
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 5114
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.3359371913803948
          entropy_coeff: 0.009999999999999998
          kl: 0.013046431710815416
          policy_loss: -0.11822510874933667
          total_loss: 1.292661683095826
          vf_explained_var: 0.3671473264694214
          vf_loss: 1.4156840354204179
    num_agent_steps_sampled: 1516000
    num_agent_steps_trained: 1516000
    num_steps_sampled: 1516000
    num_steps_trained: 1516000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1516,41143.6,1516000,-30.991,-21.7,-41.8,309.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1517000
  custom_metrics: {}
  date: 2021-10-29_08-32-41
  done: false
  episode_len_mean: 308.76
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -30.876000000000175
  episode_reward_min: -40.3000000000003
  episodes_this_iter: 3
  episodes_total: 5117
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.1932464665836757
          entropy_coeff: 0.009999999999999998
          kl: 0.010942239997810748
          policy_loss: -0.17326658897929723
          total_loss: 1.1252262135346731
          vf_explained_var: 0.4080039858818054
          vf_loss: 1.3032440953784519
    num_agent_steps_sampled: 1517000
    num_agent_steps_trained: 1517000
    num_steps_sampled: 1517000
    num_steps_trained: 1517000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1517,41168.6,1517000,-30.876,-21.7,-40.3,308.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1518000
  custom_metrics: {}
  date: 2021-10-29_08-33-03
  done: false
  episode_len_mean: 310.87
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.087000000000174
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 5120
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.238623019721773
          entropy_coeff: 0.009999999999999998
          kl: 0.010146728062075757
          policy_loss: -0.1191145107563999
          total_loss: 0.9786729719903734
          vf_explained_var: 0.5034381151199341
          vf_loss: 1.1035146183437772
    num_agent_steps_sampled: 1518000
    num_agent_steps_trained: 1518000
    num_steps_sampled: 1518000
    num_steps_trained: 1518000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1518,41190.6,1518000,-31.087,-21.7,-43.5,310.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1519000
  custom_metrics: {}
  date: 2021-10-29_08-33-24
  done: false
  episode_len_mean: 313.46
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.346000000000178
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 5123
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6562805629816573
          cur_lr: 5.000000000000001e-05
          entropy: 1.4164702627393935
          entropy_coeff: 0.009999999999999998
          kl: 0.02402622326921043
          policy_loss: 0.09586977751718627
          total_loss: 0.8382179097996818
          vf_explained_var: 0.5138418078422546
          vf_loss: 0.7407448914315965
    num_agent_steps_sampled: 1519000
    num_agent_steps_trained: 1519000
    num_steps_sampled: 1519000
    num_steps_trained: 1519000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1519,41211.5,1519000,-31.346,-21.7,-43.5,313.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1520000
  custom_metrics: {}
  date: 2021-10-29_08-33-51
  done: false
  episode_len_mean: 314.94
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.494000000000177
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 5126
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9844208444724855
          cur_lr: 5.000000000000001e-05
          entropy: 1.2030020912488302
          entropy_coeff: 0.009999999999999998
          kl: 0.007796814833698009
          policy_loss: -0.07155281255642573
          total_loss: 0.6670663540561994
          vf_explained_var: 0.6524811387062073
          vf_loss: 0.7429738443758752
    num_agent_steps_sampled: 1520000
    num_agent_steps_trained: 1520000
    num_steps_sampled: 1520000
    num_steps_trained: 1520000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1520,41237.6,1520000,-31.494,-21.7,-43.5,314.94




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1521000
  custom_metrics: {}
  date: 2021-10-29_08-34-29
  done: false
  episode_len_mean: 314.2
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.420000000000183
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 5130
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9844208444724855
          cur_lr: 5.000000000000001e-05
          entropy: 1.2034720427460142
          entropy_coeff: 0.009999999999999998
          kl: 0.010258505886635936
          policy_loss: 0.044235875374741024
          total_loss: 1.0590806633234024
          vf_explained_var: 0.391769140958786
          vf_loss: 1.0167808269460996
    num_agent_steps_sampled: 1521000
    num_agent_steps_trained: 1521000
    num_steps_sampled: 1521000
    num_steps_trained: 1521000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1521,41276.3,1521000,-31.42,-21.7,-43.5,314.2


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1522000
  custom_metrics: {}
  date: 2021-10-29_08-34-52
  done: false
  episode_len_mean: 316.13
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.61300000000018
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 2
  episodes_total: 5132
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9844208444724855
          cur_lr: 5.000000000000001e-05
          entropy: 1.221807885169983
          entropy_coeff: 0.009999999999999998
          kl: 0.012408734831140104
          policy_loss: -0.08392384664879905
          total_loss: 0.6921252471705278
          vf_explained_var: 0.4587475657463074
          vf_loss: 0.776051754421658
    num_agent_steps_sampled: 1522000
    num_agent_steps_trained: 1522000
    num_steps_sampled: 1522000
    num_steps_trained: 1522000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1522,41299.6,1522000,-31.613,-21.7,-43.5,316.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1523000
  custom_metrics: {}
  date: 2021-10-29_08-35-18
  done: false
  episode_len_mean: 317.62
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.762000000000175
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 5136
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9844208444724855
          cur_lr: 5.000000000000001e-05
          entropy: 1.1632597870296901
          entropy_coeff: 0.009999999999999998
          kl: 0.007445187746314118
          policy_loss: -0.07843457361062368
          total_loss: 0.9814705640077591
          vf_explained_var: 0.49547237157821655
          vf_loss: 1.0642085323731105
    num_agent_steps_sampled: 1523000
    num_agent_steps_trained: 1523000
    num_steps_sampled: 1523000
    num_steps_trained: 1523000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1523,41324.7,1523000,-31.762,-21.7,-43.5,317.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1524000
  custom_metrics: {}
  date: 2021-10-29_08-35-41
  done: false
  episode_len_mean: 317.98
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.798000000000183
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 5139
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9844208444724855
          cur_lr: 5.000000000000001e-05
          entropy: 1.1543518914116753
          entropy_coeff: 0.009999999999999998
          kl: 0.006006345251552148
          policy_loss: 0.02925050730506579
          total_loss: 0.8646153645382987
          vf_explained_var: 0.5283982753753662
          vf_loss: 0.8409956086633934
    num_agent_steps_sampled: 1524000
    num_agent_steps_trained: 1524000
    num_steps_sampled: 1524000
    num_steps_trained: 1524000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1524,41348,1524000,-31.798,-21.7,-43.5,317.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1525000
  custom_metrics: {}
  date: 2021-10-29_08-36-06
  done: false
  episode_len_mean: 317.58
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.758000000000173
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 5142
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9844208444724855
          cur_lr: 5.000000000000001e-05
          entropy: 0.9549527664979299
          entropy_coeff: 0.009999999999999998
          kl: 0.004641309733682597
          policy_loss: -0.09535903814766142
          total_loss: 1.3621793389320374
          vf_explained_var: 0.15080754458904266
          vf_loss: 1.462518909242418
    num_agent_steps_sampled: 1525000
    num_agent_steps_trained: 1525000
    num_steps_sampled: 1525000
    num_steps_trained: 1525000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1525,41373.1,1525000,-31.758,-21.7,-43.5,317.58


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1526000
  custom_metrics: {}
  date: 2021-10-29_08-36-30
  done: false
  episode_len_mean: 318.05
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.80500000000018
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 5145
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49221042223624273
          cur_lr: 5.000000000000001e-05
          entropy: 1.1214597139093612
          entropy_coeff: 0.009999999999999998
          kl: 0.019396002191757983
          policy_loss: -0.14135566221343146
          total_loss: 0.7752707779407502
          vf_explained_var: 0.6661480665206909
          vf_loss: 0.9182941238085429
    num_agent_steps_sampled: 1526000
    num_agent_steps_trained: 1526000
    num_steps_sampled: 1526000
    num_steps_trained: 1526000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1526,41396.7,1526000,-31.805,-21.7,-43.5,318.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1527000
  custom_metrics: {}
  date: 2021-10-29_08-36-54
  done: false
  episode_len_mean: 320.64
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -32.06400000000018
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 5148
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49221042223624273
          cur_lr: 5.000000000000001e-05
          entropy: 1.1165054712030622
          entropy_coeff: 0.009999999999999998
          kl: 0.01008784541854229
          policy_loss: -0.10709021588166555
          total_loss: 0.7762715190649032
          vf_explained_var: 0.44566938281059265
          vf_loss: 0.8895614392227597
    num_agent_steps_sampled: 1527000
    num_agent_steps_trained: 1527000
    num_steps_sampled: 1527000
    num_steps_trained: 1527000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1527,41420.5,1527000,-32.064,-21.7,-43.5,320.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1528000
  custom_metrics: {}
  date: 2021-10-29_08-37-20
  done: false
  episode_len_mean: 319.86
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.98600000000019
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 4
  episodes_total: 5152
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49221042223624273
          cur_lr: 5.000000000000001e-05
          entropy: 0.9847948107454512
          entropy_coeff: 0.009999999999999998
          kl: 0.008466289535824798
          policy_loss: 0.005263897197114097
          total_loss: 0.824471014075809
          vf_explained_var: 0.6080280542373657
          vf_loss: 0.8248878796895345
    num_agent_steps_sampled: 1528000
    num_agent_steps_trained: 1528000
    num_steps_sampled: 1528000
    num_steps_trained: 1528000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1528,41447.3,1528000,-31.986,-21.7,-43.5,319.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1529000
  custom_metrics: {}
  date: 2021-10-29_08-37-42
  done: false
  episode_len_mean: 319.03
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.903000000000187
  episode_reward_min: -43.50000000000035
  episodes_this_iter: 3
  episodes_total: 5155
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49221042223624273
          cur_lr: 5.000000000000001e-05
          entropy: 1.0214832246303558
          entropy_coeff: 0.009999999999999998
          kl: 0.008737245040654922
          policy_loss: 0.05352222952577803
          total_loss: 0.7514574597279231
          vf_explained_var: 0.5163416862487793
          vf_loss: 0.7038495052191946
    num_agent_steps_sampled: 1529000
    num_agent_steps_trained: 1529000
    num_steps_sampled: 1529000
    num_steps_trained: 1529000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1529,41468.7,1529000,-31.903,-21.7,-43.5,319.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1530000
  custom_metrics: {}
  date: 2021-10-29_08-38-07
  done: false
  episode_len_mean: 319.4
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.940000000000182
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 3
  episodes_total: 5158
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.49221042223624273
          cur_lr: 5.000000000000001e-05
          entropy: 1.2315618620978461
          entropy_coeff: 0.009999999999999998
          kl: 0.02308793770584623
          policy_loss: 0.1319629525144895
          total_loss: 0.7275517768330044
          vf_explained_var: 0.665422797203064
          vf_loss: 0.5965403114755948
    num_agent_steps_sampled: 1530000
    num_agent_steps_trained: 1530000
    num_steps_sampled: 1530000
    num_steps_trained: 1530000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1530,41493.8,1530000,-31.94,-21.7,-43.7,319.4




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1531000
  custom_metrics: {}
  date: 2021-10-29_08-38-51
  done: false
  episode_len_mean: 317.76
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.77600000000018
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 4
  episodes_total: 5162
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.1962757137086657
          entropy_coeff: 0.009999999999999998
          kl: 0.012180970756177857
          policy_loss: -0.09931565423806508
          total_loss: 0.796978470020824
          vf_explained_var: 0.672359824180603
          vf_loss: 0.8992634793122609
    num_agent_steps_sampled: 1531000
    num_agent_steps_trained: 1531000
    num_steps_sampled: 1531000
    num_steps_trained: 1531000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1531,41538,1531000,-31.776,-21.7,-43.7,317.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1532000
  custom_metrics: {}
  date: 2021-10-29_08-39-17
  done: false
  episode_len_mean: 314.89
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.489000000000182
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 3
  episodes_total: 5165
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.0892180091804928
          entropy_coeff: 0.009999999999999998
          kl: 0.014611178947650908
          policy_loss: 0.09771159605847465
          total_loss: 0.5460845683390896
          vf_explained_var: 0.5686806440353394
          vf_loss: 0.4484774919019805
    num_agent_steps_sampled: 1532000
    num_agent_steps_trained: 1532000
    num_steps_sampled: 1532000
    num_steps_trained: 1532000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1532,41564,1532000,-31.489,-21.7,-43.7,314.89


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1533000
  custom_metrics: {}
  date: 2021-10-29_08-39-41
  done: false
  episode_len_mean: 314.84
  episode_media: {}
  episode_reward_max: -21.70000000000004
  episode_reward_mean: -31.484000000000172
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 3
  episodes_total: 5168
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.2963751196861266
          entropy_coeff: 0.009999999999999998
          kl: 0.010004914077591347
          policy_loss: -0.02972833721174134
          total_loss: 0.5362255164318614
          vf_explained_var: 0.602814257144928
          vf_loss: 0.5715308234095573
    num_agent_steps_sampled: 1533000
    num_agent_steps_trained: 1533000
    num_steps_sampled: 1533000
    num_steps_trained: 1533000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1533,41587.8,1533000,-31.484,-21.7,-43.7,314.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1534000
  custom_metrics: {}
  date: 2021-10-29_08-40-04
  done: false
  episode_len_mean: 317.11
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -31.71100000000018
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 3
  episodes_total: 5171
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 0.9214843412240347
          entropy_coeff: 0.009999999999999998
          kl: 0.00764948454640553
          policy_loss: -0.07276610202259487
          total_loss: 1.2717828035354615
          vf_explained_var: 0.21724970638751984
          vf_loss: 1.3481159918838077
    num_agent_steps_sampled: 1534000
    num_agent_steps_trained: 1534000
    num_steps_sampled: 1534000
    num_steps_trained: 1534000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1534,41610.4,1534000,-31.711,-24.1,-43.7,317.11


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1535000
  custom_metrics: {}
  date: 2021-10-29_08-40-28
  done: false
  episode_len_mean: 315.98
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -31.59800000000018
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 4
  episodes_total: 5175
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 0.8642658717102475
          entropy_coeff: 0.009999999999999998
          kl: 0.006357393639329148
          policy_loss: 0.04861390772793028
          total_loss: 1.0126038624180689
          vf_explained_var: 0.43653929233551025
          vf_loss: 0.9679388483365376
    num_agent_steps_sampled: 1535000
    num_agent_steps_trained: 1535000
    num_steps_sampled: 1535000
    num_steps_trained: 1535000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1535,41635.2,1535000,-31.598,-24.1,-43.7,315.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1536000
  custom_metrics: {}
  date: 2021-10-29_08-40-56
  done: false
  episode_len_mean: 315.87
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -31.587000000000174
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 3
  episodes_total: 5178
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 0.802698987060123
          entropy_coeff: 0.009999999999999998
          kl: 0.011616501253297903
          policy_loss: -0.04243723899126053
          total_loss: 0.8049462825059891
          vf_explained_var: 0.19965332746505737
          vf_loss: 0.8468338681591882
    num_agent_steps_sampled: 1536000
    num_agent_steps_trained: 1536000
    num_steps_sampled: 1536000
    num_steps_trained: 1536000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1536,41662.4,1536000,-31.587,-24.1,-43.7,315.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1537000
  custom_metrics: {}
  date: 2021-10-29_08-41-19
  done: false
  episode_len_mean: 316.56
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -31.65600000000018
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 3
  episodes_total: 5181
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.148929390642378
          entropy_coeff: 0.009999999999999998
          kl: 0.00791499446274404
          policy_loss: -0.015463759005069733
          total_loss: 0.6592680207557149
          vf_explained_var: 0.6449800133705139
          vf_loss: 0.6803773171371884
    num_agent_steps_sampled: 1537000
    num_agent_steps_trained: 1537000
    num_steps_sampled: 1537000
    num_steps_trained: 1537000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1537,41685.8,1537000,-31.656,-24.1,-43.7,316.56


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1538000
  custom_metrics: {}
  date: 2021-10-29_08-41-44
  done: false
  episode_len_mean: 316.1
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -31.610000000000177
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 4
  episodes_total: 5185
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.0488120085663266
          entropy_coeff: 0.009999999999999998
          kl: 0.01612415042128638
          policy_loss: -0.022366989817884233
          total_loss: 0.926331521736251
          vf_explained_var: 0.4254879057407379
          vf_loss: 0.9472819172673755
    num_agent_steps_sampled: 1538000
    num_agent_steps_trained: 1538000
    num_steps_sampled: 1538000
    num_steps_trained: 1538000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1538,41710.5,1538000,-31.61,-24.1,-43.7,316.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1539000
  custom_metrics: {}
  date: 2021-10-29_08-42-07
  done: false
  episode_len_mean: 317.98
  episode_media: {}
  episode_reward_max: -24.100000000000072
  episode_reward_mean: -31.79800000000018
  episode_reward_min: -43.70000000000035
  episodes_this_iter: 3
  episodes_total: 5188
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 0.9621642463737063
          entropy_coeff: 0.009999999999999998
          kl: 0.013532000836927556
          policy_loss: 0.038599108821815914
          total_loss: 1.1456069231033326
          vf_explained_var: -0.13972966372966766
          vf_loss: 1.1066385660320521
    num_agent_steps_sampled: 1539000
    num_agent_steps_trained: 1539000
    num_steps_sampled: 1539000
    num_steps_trained: 1539000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1539,41733.7,1539000,-31.798,-24.1,-43.7,317.98




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1540000
  custom_metrics: {}
  date: 2021-10-29_08-42-49
  done: false
  episode_len_mean: 317.95
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.795000000000183
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5191
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.100786066055298
          entropy_coeff: 0.009999999999999998
          kl: 0.009974203030344084
          policy_loss: 0.10101470152537027
          total_loss: 0.6880582422018051
          vf_explained_var: 0.5967426896095276
          vf_loss: 0.5906872967051136
    num_agent_steps_sampled: 1540000
    num_agent_steps_trained: 1540000
    num_steps_sampled: 1540000
    num_steps_trained: 1540000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1540,41775.2,1540000,-31.795,-21.6,-45.1,317.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1541000
  custom_metrics: {}
  date: 2021-10-29_08-43-14
  done: false
  episode_len_mean: 317.97
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.797000000000182
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5194
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.1087879359722137
          entropy_coeff: 0.009999999999999998
          kl: 0.009099804202590791
          policy_loss: 0.0707920049627622
          total_loss: 1.205658115280999
          vf_explained_var: -0.07472454011440277
          vf_loss: 1.1392354574468402
    num_agent_steps_sampled: 1541000
    num_agent_steps_trained: 1541000
    num_steps_sampled: 1541000
    num_steps_trained: 1541000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1541,41800.9,1541000,-31.797,-21.6,-45.1,317.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1542000
  custom_metrics: {}
  date: 2021-10-29_08-43-40
  done: false
  episode_len_mean: 316.83
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.683000000000185
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5198
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 0.9279421753353543
          entropy_coeff: 0.009999999999999998
          kl: 0.005224021199704352
          policy_loss: -0.0006144057545397017
          total_loss: 0.9645803365442488
          vf_explained_var: 0.47093379497528076
          vf_loss: 0.970617194308175
    num_agent_steps_sampled: 1542000
    num_agent_steps_trained: 1542000
    num_steps_sampled: 1542000
    num_steps_trained: 154200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1542,41827.1,1542000,-31.683,-21.6,-45.1,316.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1543000
  custom_metrics: {}
  date: 2021-10-29_08-44-04
  done: false
  episode_len_mean: 316.62
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.66200000000018
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5201
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.1978952818446689
          entropy_coeff: 0.009999999999999998
          kl: 0.010460626394041345
          policy_loss: -0.012977982229656643
          total_loss: 0.9946670995818244
          vf_explained_var: 0.32242918014526367
          vf_loss: 1.0119007940093676
    num_agent_steps_sampled: 1543000
    num_agent_steps_trained: 1543000
    num_steps_sampled: 1543000
    num_steps_trained: 1543000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1543,41850.9,1543000,-31.662,-21.6,-45.1,316.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1544000
  custom_metrics: {}
  date: 2021-10-29_08-44-31
  done: false
  episode_len_mean: 316.37
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.63700000000018
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5204
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.0234680818186865
          entropy_coeff: 0.009999999999999998
          kl: 0.009256087250790775
          policy_loss: 0.015231046163373523
          total_loss: 0.9289076921012667
          vf_explained_var: 0.1903536468744278
          vf_loss: 0.9170774069097307
    num_agent_steps_sampled: 1544000
    num_agent_steps_trained: 1544000
    num_steps_sampled: 1544000
    num_steps_trained: 1544000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1544,41877.9,1544000,-31.637,-21.6,-45.1,316.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1545000
  custom_metrics: {}
  date: 2021-10-29_08-44-53
  done: false
  episode_len_mean: 313.75
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.375000000000178
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5207
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 0.8136953254540761
          entropy_coeff: 0.009999999999999998
          kl: 0.011525817851074887
          policy_loss: -0.07929406878021028
          total_loss: 1.0624635689788395
          vf_explained_var: 0.5141552686691284
          vf_loss: 1.1413848989539677
    num_agent_steps_sampled: 1545000
    num_agent_steps_trained: 1545000
    num_steps_sampled: 1545000
    num_steps_trained: 1545000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1545,41900,1545000,-31.375,-21.6,-45.1,313.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1546000
  custom_metrics: {}
  date: 2021-10-29_08-45-13
  done: false
  episode_len_mean: 315.64
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.564000000000178
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5210
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.5797335651185778
          entropy_coeff: 0.009999999999999998
          kl: 0.00640988766383275
          policy_loss: 0.010957128389014139
          total_loss: 1.4257201847102907
          vf_explained_var: -0.32420533895492554
          vf_loss: 1.425827893945906
    num_agent_steps_sampled: 1546000
    num_agent_steps_trained: 1546000
    num_steps_sampled: 1546000
    num_steps_trained: 1546000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1546,41919.5,1546000,-31.564,-21.6,-51.3,315.64


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1547000
  custom_metrics: {}
  date: 2021-10-29_08-45-35
  done: false
  episode_len_mean: 314.85
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.485000000000174
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5213
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.3786175141731898
          entropy_coeff: 0.009999999999999998
          kl: 0.013397427783467227
          policy_loss: 0.14364165655440755
          total_loss: 0.6674212023615838
          vf_explained_var: 0.2101449817419052
          vf_loss: 0.527674189582467
    num_agent_steps_sampled: 1547000
    num_agent_steps_trained: 1547000
    num_steps_sampled: 1547000
    num_steps_trained: 1547000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1547,41941.8,1547000,-31.485,-21.6,-51.3,314.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1548000
  custom_metrics: {}
  date: 2021-10-29_08-46-00
  done: false
  episode_len_mean: 315.44
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.544000000000178
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5216
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.4047457284397549
          entropy_coeff: 0.009999999999999998
          kl: 0.014302911853069134
          policy_loss: 0.055507521828015646
          total_loss: 0.6823501946197615
          vf_explained_var: 0.1546458750963211
          vf_loss: 0.6303300663001008
    num_agent_steps_sampled: 1548000
    num_agent_steps_trained: 1548000
    num_steps_sampled: 1548000
    num_steps_trained: 1548000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1548,41966.3,1548000,-31.544,-21.6,-51.3,315.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1549000
  custom_metrics: {}
  date: 2021-10-29_08-46-25
  done: false
  episode_len_mean: 312.54
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.254000000000175
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5219
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.0970012227694192
          entropy_coeff: 0.009999999999999998
          kl: 0.011807494204716208
          policy_loss: -0.06554289385676385
          total_loss: 1.2139100376102658
          vf_explained_var: 0.3137342035770416
          vf_loss: 1.2817052880922952
    num_agent_steps_sampled: 1549000
    num_agent_steps_trained: 1549000
    num_steps_sampled: 1549000
    num_steps_trained: 1549000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1549,41991,1549000,-31.254,-21.6,-51.3,312.54




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1550000
  custom_metrics: {}
  date: 2021-10-29_08-47-03
  done: false
  episode_len_mean: 314.62
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.462000000000184
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5222
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.606780387295617
          entropy_coeff: 0.009999999999999998
          kl: 0.010826467206769132
          policy_loss: -0.020130283633867898
          total_loss: 1.4680599106682672
          vf_explained_var: -0.22974540293216705
          vf_loss: 1.4962646416491932
    num_agent_steps_sampled: 1550000
    num_agent_steps_trained: 1550000
    num_steps_sampled: 1550000
    num_steps_trained: 155000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1550,42029,1550000,-31.462,-21.6,-51.3,314.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1551000
  custom_metrics: {}
  date: 2021-10-29_08-47-25
  done: false
  episode_len_mean: 315.53
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.55300000000018
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 2
  episodes_total: 5224
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.445584734280904
          entropy_coeff: 0.009999999999999998
          kl: 0.011013354456188059
          policy_loss: -0.08137793863813082
          total_loss: 0.7123781490657065
          vf_explained_var: -0.15237532556056976
          vf_loss: 0.8000806028644244
    num_agent_steps_sampled: 1551000
    num_agent_steps_trained: 1551000
    num_steps_sampled: 1551000
    num_steps_trained: 1551000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1551,42051.4,1551000,-31.553,-21.6,-51.3,315.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1552000
  custom_metrics: {}
  date: 2021-10-29_08-47-48
  done: false
  episode_len_mean: 317.1
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.710000000000186
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5227
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.495903374089135
          entropy_coeff: 0.009999999999999998
          kl: 0.018613234854804893
          policy_loss: -0.22560796627981794
          total_loss: 0.7345730422271622
          vf_explained_var: 0.09268910437822342
          vf_loss: 0.9613976059688463
    num_agent_steps_sampled: 1552000
    num_agent_steps_trained: 1552000
    num_steps_sampled: 1552000
    num_steps_trained: 1552000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1552,42074.1,1552000,-31.71,-21.6,-51.3,317.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1553000
  custom_metrics: {}
  date: 2021-10-29_08-48-10
  done: false
  episode_len_mean: 316.24
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.624000000000184
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5230
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.3388334386878544
          entropy_coeff: 0.009999999999999998
          kl: 0.019282121754907264
          policy_loss: -0.01984372306615114
          total_loss: 0.8320035613245435
          vf_explained_var: 0.6566399931907654
          vf_loss: 0.8509993255138397
    num_agent_steps_sampled: 1553000
    num_agent_steps_trained: 1553000
    num_steps_sampled: 1553000
    num_steps_trained: 1553000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1553,42095.9,1553000,-31.624,-21.6,-51.3,316.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1554000
  custom_metrics: {}
  date: 2021-10-29_08-48-31
  done: false
  episode_len_mean: 317.78
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.77800000000018
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5233
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.7485666818088956
          entropy_coeff: 0.009999999999999998
          kl: 0.011248958736940848
          policy_loss: 0.07680638051695293
          total_loss: 1.01544927822219
          vf_explained_var: 0.06752873957157135
          vf_loss: 0.9478232883744769
    num_agent_steps_sampled: 1554000
    num_agent_steps_trained: 1554000
    num_steps_sampled: 1554000
    num_steps_trained: 1554000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1554,42117.8,1554000,-31.778,-21.6,-51.3,317.78


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1555000
  custom_metrics: {}
  date: 2021-10-29_08-48-57
  done: false
  episode_len_mean: 316.7
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.670000000000183
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5236
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.466619414753384
          entropy_coeff: 0.009999999999999998
          kl: 0.01241047453338303
          policy_loss: 0.15319338391224543
          total_loss: 0.4107362792723709
          vf_explained_var: 0.8713700771331787
          vf_loss: 0.2630462389025423
    num_agent_steps_sampled: 1555000
    num_agent_steps_trained: 1555000
    num_steps_sampled: 1555000
    num_steps_trained: 1555000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1555,42143,1555000,-31.67,-21.6,-51.3,316.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1556000
  custom_metrics: {}
  date: 2021-10-29_08-49-24
  done: false
  episode_len_mean: 316.73
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.67300000000018
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 4
  episodes_total: 5240
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.160209412044949
          entropy_coeff: 0.009999999999999998
          kl: 0.012087484892500475
          policy_loss: 0.07143768303924136
          total_loss: 0.6603346490197711
          vf_explained_var: 0.8911460041999817
          vf_loss: 0.5915746827920277
    num_agent_steps_sampled: 1556000
    num_agent_steps_trained: 1556000
    num_steps_sampled: 1556000
    num_steps_trained: 1556000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1556,42170.1,1556000,-31.673,-21.6,-51.3,316.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1557000
  custom_metrics: {}
  date: 2021-10-29_08-49-42
  done: false
  episode_len_mean: 319.26
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.926000000000187
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 2
  episodes_total: 5242
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.483597207069397
          entropy_coeff: 0.009999999999999998
          kl: 0.010396836755208937
          policy_loss: -0.07231649466686779
          total_loss: 0.6946644107500712
          vf_explained_var: 0.6781581044197083
          vf_loss: 0.7741407128671806
    num_agent_steps_sampled: 1557000
    num_agent_steps_trained: 1557000
    num_steps_sampled: 1557000
    num_steps_trained: 1557000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1557,42187.9,1557000,-31.926,-21.6,-51.3,319.26


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1558000
  custom_metrics: {}
  date: 2021-10-29_08-50-02
  done: false
  episode_len_mean: 322.12
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -32.21200000000018
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5245
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.6140497790442572
          entropy_coeff: 0.009999999999999998
          kl: 0.007357674961895258
          policy_loss: 0.014280526671144697
          total_loss: 0.7463975164625379
          vf_explained_var: 0.5200760960578918
          vf_loss: 0.7428251945310169
    num_agent_steps_sampled: 1558000
    num_agent_steps_trained: 1558000
    num_steps_sampled: 1558000
    num_steps_trained: 1558000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1558,42208.7,1558000,-32.212,-21.6,-51.3,322.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1559000
  custom_metrics: {}
  date: 2021-10-29_08-50-28
  done: false
  episode_len_mean: 319.44
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.944000000000184
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5248
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.3282954931259154
          entropy_coeff: 0.009999999999999998
          kl: 0.0143372395487704
          policy_loss: -0.06479568435913986
          total_loss: 0.39107262736393344
          vf_explained_var: 0.8671906590461731
          vf_loss: 0.45856585568851893
    num_agent_steps_sampled: 1559000
    num_agent_steps_trained: 1559000
    num_steps_sampled: 1559000
    num_steps_trained: 1559000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1559,42234.6,1559000,-31.944,-21.6,-51.3,319.44




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1560000
  custom_metrics: {}
  date: 2021-10-29_08-51-12
  done: false
  episode_len_mean: 321.4
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -32.140000000000185
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 4
  episodes_total: 5252
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7383156333543643
          cur_lr: 5.000000000000001e-05
          entropy: 1.0545168651474848
          entropy_coeff: 0.009999999999999998
          kl: 0.004527288191792343
          policy_loss: -0.11308636797799004
          total_loss: 1.0214441153738234
          vf_explained_var: 0.5164917707443237
          vf_loss: 1.141733095380995
    num_agent_steps_sampled: 1560000
    num_agent_steps_trained: 1560000
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1560,42277.8,1560000,-32.14,-21.6,-51.3,321.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1561000
  custom_metrics: {}
  date: 2021-10-29_08-51-38
  done: false
  episode_len_mean: 319.05
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.905000000000186
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5255
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.36915781667718217
          cur_lr: 5.000000000000001e-05
          entropy: 1.043108993437555
          entropy_coeff: 0.009999999999999998
          kl: 0.011360090893326142
          policy_loss: -0.046601526190837225
          total_loss: 0.6199646562337875
          vf_explained_var: 0.595978319644928
          vf_loss: 0.6728036125500997
    num_agent_steps_sampled: 1561000
    num_agent_steps_trained: 1561000
    num_steps_sampled: 1561000
    num_steps_trained: 1561000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1561,42304.4,1561000,-31.905,-21.6,-51.3,319.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1562000
  custom_metrics: {}
  date: 2021-10-29_08-52-03
  done: false
  episode_len_mean: 318.35
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.835000000000186
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5258
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.36915781667718217
          cur_lr: 5.000000000000001e-05
          entropy: 0.9376410146554311
          entropy_coeff: 0.009999999999999998
          kl: 0.011033261912648721
          policy_loss: -0.15782740381028917
          total_loss: 0.6928413831525378
          vf_explained_var: 0.6214969158172607
          vf_loss: 0.8559721807638804
    num_agent_steps_sampled: 1562000
    num_agent_steps_trained: 1562000
    num_steps_sampled: 1562000
    num_steps_trained: 1562000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1562,42329.3,1562000,-31.835,-21.6,-51.3,318.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1563000
  custom_metrics: {}
  date: 2021-10-29_08-52-31
  done: false
  episode_len_mean: 318.91
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.891000000000187
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 4
  episodes_total: 5262
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.36915781667718217
          cur_lr: 5.000000000000001e-05
          entropy: 1.3369200653500026
          entropy_coeff: 0.009999999999999998
          kl: 0.02455218319221591
          policy_loss: -0.03579197819862101
          total_loss: 0.5564167294237349
          vf_explained_var: 0.7056151628494263
          vf_loss: 0.5965142703718609
    num_agent_steps_sampled: 1563000
    num_agent_steps_trained: 1563000
    num_steps_sampled: 1563000
    num_steps_trained: 1563000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1563,42357.1,1563000,-31.891,-21.6,-51.3,318.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1564000
  custom_metrics: {}
  date: 2021-10-29_08-52-59
  done: false
  episode_len_mean: 317.51
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.751000000000186
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 4
  episodes_total: 5266
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5537367250157733
          cur_lr: 5.000000000000001e-05
          entropy: 0.8261100338564978
          entropy_coeff: 0.009999999999999998
          kl: 0.008838884662499583
          policy_loss: -0.0030181729131274755
          total_loss: 0.3400995819932885
          vf_explained_var: 0.8476903438568115
          vf_loss: 0.3464844384127193
    num_agent_steps_sampled: 1564000
    num_agent_steps_trained: 1564000
    num_steps_sampled: 1564000
    num_steps_trained: 156400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1564,42385.4,1564000,-31.751,-21.6,-51.3,317.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1565000
  custom_metrics: {}
  date: 2021-10-29_08-53-25
  done: false
  episode_len_mean: 315.76
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.576000000000178
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5269
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5537367250157733
          cur_lr: 5.000000000000001e-05
          entropy: 1.6490855044788784
          entropy_coeff: 0.009999999999999998
          kl: 0.00867782750236409
          policy_loss: 0.0030222765273518032
          total_loss: 0.5405033349990844
          vf_explained_var: 0.6062033176422119
          vf_loss: 0.549166684349378
    num_agent_steps_sampled: 1565000
    num_agent_steps_trained: 1565000
    num_steps_sampled: 1565000
    num_steps_trained: 1565000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1565,42411.6,1565000,-31.576,-21.6,-51.3,315.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1566000
  custom_metrics: {}
  date: 2021-10-29_08-53-51
  done: false
  episode_len_mean: 315.44
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.54400000000018
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 4
  episodes_total: 5273
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5537367250157733
          cur_lr: 5.000000000000001e-05
          entropy: 1.5191538254419963
          entropy_coeff: 0.009999999999999998
          kl: 0.006668442030112863
          policy_loss: 0.042203237592346135
          total_loss: 0.7112599624527826
          vf_explained_var: 0.4929982125759125
          vf_loss: 0.6805556986067031
    num_agent_steps_sampled: 1566000
    num_agent_steps_trained: 1566000
    num_steps_sampled: 1566000
    num_steps_trained: 1566000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1566,42437.2,1566000,-31.544,-21.6,-51.3,315.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1567000
  custom_metrics: {}
  date: 2021-10-29_08-54-15
  done: false
  episode_len_mean: 315.17
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.517000000000177
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5276
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5537367250157733
          cur_lr: 5.000000000000001e-05
          entropy: 1.30144342051612
          entropy_coeff: 0.009999999999999998
          kl: 0.01528568827318624
          policy_loss: -0.0018614472614394294
          total_loss: 0.7279909713400735
          vf_explained_var: 0.287849098443985
          vf_loss: 0.734402600924174
    num_agent_steps_sampled: 1567000
    num_agent_steps_trained: 1567000
    num_steps_sampled: 1567000
    num_steps_trained: 1567000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1567,42461.4,1567000,-31.517,-21.6,-51.3,315.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1568000
  custom_metrics: {}
  date: 2021-10-29_08-54-39
  done: false
  episode_len_mean: 316.52
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.65200000000018
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5279
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5537367250157733
          cur_lr: 5.000000000000001e-05
          entropy: 1.1343788497977787
          entropy_coeff: 0.009999999999999998
          kl: 0.006477661981867142
          policy_loss: -0.047067828393644755
          total_loss: 0.8607774158318837
          vf_explained_var: 0.46491244435310364
          vf_loss: 0.9156021025445726
    num_agent_steps_sampled: 1568000
    num_agent_steps_trained: 1568000
    num_steps_sampled: 1568000
    num_steps_trained: 1568000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1568,42485.5,1568000,-31.652,-21.6,-51.3,316.52




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1569000
  custom_metrics: {}
  date: 2021-10-29_08-55-20
  done: false
  episode_len_mean: 316.88
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.688000000000184
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5282
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5537367250157733
          cur_lr: 5.000000000000001e-05
          entropy: 1.224604340394338
          entropy_coeff: 0.009999999999999998
          kl: 0.0138337970575049
          policy_loss: -0.0934816524386406
          total_loss: 0.6422161322500971
          vf_explained_var: 0.44091829657554626
          vf_loss: 0.7402835418780644
    num_agent_steps_sampled: 1569000
    num_agent_steps_trained: 1569000
    num_steps_sampled: 1569000
    num_steps_trained: 1569000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1569,42525.6,1569000,-31.688,-21.6,-51.3,316.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1570000
  custom_metrics: {}
  date: 2021-10-29_08-55-43
  done: false
  episode_len_mean: 316.79
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.679000000000183
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 4
  episodes_total: 5286
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5537367250157733
          cur_lr: 5.000000000000001e-05
          entropy: 1.1448984152740902
          entropy_coeff: 0.009999999999999998
          kl: 0.011748742588546987
          policy_loss: 0.0004435613751411438
          total_loss: 0.5215803871552149
          vf_explained_var: 0.7847139835357666
          vf_loss: 0.5260800878206889
    num_agent_steps_sampled: 1570000
    num_agent_steps_trained: 1570000
    num_steps_sampled: 1570000
    num_steps_trained: 1570000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1570,42548.8,1570000,-31.679,-21.6,-51.3,316.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1571000
  custom_metrics: {}
  date: 2021-10-29_08-56-01
  done: false
  episode_len_mean: 319.17
  episode_media: {}
  episode_reward_max: -21.600000000000037
  episode_reward_mean: -31.917000000000186
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 2
  episodes_total: 5288
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5537367250157733
          cur_lr: 5.000000000000001e-05
          entropy: 1.653438036971622
          entropy_coeff: 0.009999999999999998
          kl: 0.011517848554648364
          policy_loss: 0.13719163412849109
          total_loss: 0.46608104275332557
          vf_explained_var: 0.5485664010047913
          vf_loss: 0.339045932640632
    num_agent_steps_sampled: 1571000
    num_agent_steps_trained: 1571000
    num_steps_sampled: 1571000
    num_steps_trained: 1571000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1571,42566.8,1571000,-31.917,-21.6,-51.3,319.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1572000
  custom_metrics: {}
  date: 2021-10-29_08-56-27
  done: false
  episode_len_mean: 318.31
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.83100000000018
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5291
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5537367250157733
          cur_lr: 5.000000000000001e-05
          entropy: 0.9160687757862939
          entropy_coeff: 0.009999999999999998
          kl: 0.004181021779710371
          policy_loss: -0.16305845843421088
          total_loss: 0.35987562470965917
          vf_explained_var: 0.7288874387741089
          vf_loss: 0.5297795875204934
    num_agent_steps_sampled: 1572000
    num_agent_steps_trained: 1572000
    num_steps_sampled: 1572000
    num_steps_trained: 1572000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1572,42593.4,1572000,-31.831,-22,-51.3,318.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1573000
  custom_metrics: {}
  date: 2021-10-29_08-56-54
  done: false
  episode_len_mean: 317.29
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.729000000000177
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 4
  episodes_total: 5295
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27686836250788666
          cur_lr: 5.000000000000001e-05
          entropy: 0.8898552146222857
          entropy_coeff: 0.009999999999999998
          kl: 0.016883990338989817
          policy_loss: 0.0041884622226158776
          total_loss: 0.5239217364125781
          vf_explained_var: 0.7445443868637085
          vf_loss: 0.5239571786589092
    num_agent_steps_sampled: 1573000
    num_agent_steps_trained: 1573000
    num_steps_sampled: 1573000
    num_steps_trained: 157300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1573,42620.2,1573000,-31.729,-22,-51.3,317.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1574000
  custom_metrics: {}
  date: 2021-10-29_08-57-21
  done: false
  episode_len_mean: 317.84
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.784000000000184
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5298
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27686836250788666
          cur_lr: 5.000000000000001e-05
          entropy: 0.9640441377957661
          entropy_coeff: 0.009999999999999998
          kl: 0.0068608940626611395
          policy_loss: -0.04850790591703521
          total_loss: 0.3718002667029699
          vf_explained_var: 0.7593327164649963
          vf_loss: 0.4280490491125319
    num_agent_steps_sampled: 1574000
    num_agent_steps_trained: 1574000
    num_steps_sampled: 1574000
    num_steps_trained: 157400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1574,42646.5,1574000,-31.784,-22,-51.3,317.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1575000
  custom_metrics: {}
  date: 2021-10-29_08-57-47
  done: false
  episode_len_mean: 316.66
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.666000000000178
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 4
  episodes_total: 5302
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.27686836250788666
          cur_lr: 5.000000000000001e-05
          entropy: 0.9386900537543826
          entropy_coeff: 0.009999999999999998
          kl: 0.020433622033113212
          policy_loss: 0.024784423576460945
          total_loss: 0.5221715966860453
          vf_explained_var: 0.6434555649757385
          vf_loss: 0.5011166549391217
    num_agent_steps_sampled: 1575000
    num_agent_steps_trained: 1575000
    num_steps_sampled: 1575000
    num_steps_trained: 1575000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1575,42672.8,1575000,-31.666,-22,-51.3,316.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1576000
  custom_metrics: {}
  date: 2021-10-29_08-58-10
  done: false
  episode_len_mean: 317.86
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.786000000000183
  episode_reward_min: -51.30000000000046
  episodes_this_iter: 3
  episodes_total: 5305
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41530254376182996
          cur_lr: 5.000000000000001e-05
          entropy: 0.8596789081891377
          entropy_coeff: 0.009999999999999998
          kl: 0.008581169453465284
          policy_loss: 0.09688218235969544
          total_loss: 0.5516261902120378
          vf_explained_var: 0.7240738868713379
          vf_loss: 0.45977700915601516
    num_agent_steps_sampled: 1576000
    num_agent_steps_trained: 1576000
    num_steps_sampled: 1576000
    num_steps_trained: 1576000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1576,42695.6,1576000,-31.786,-22,-51.3,317.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1577000
  custom_metrics: {}
  date: 2021-10-29_08-58-38
  done: false
  episode_len_mean: 313.18
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.31800000000017
  episode_reward_min: -51.100000000000456
  episodes_this_iter: 4
  episodes_total: 5309
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41530254376182996
          cur_lr: 5.000000000000001e-05
          entropy: 0.6471783101558686
          entropy_coeff: 0.009999999999999998
          kl: 0.005601379731332182
          policy_loss: -0.14282147900925743
          total_loss: 0.5318750124838617
          vf_explained_var: 0.6062420606613159
          vf_loss: 0.6788419998354382
    num_agent_steps_sampled: 1577000
    num_agent_steps_trained: 1577000
    num_steps_sampled: 1577000
    num_steps_trained: 1577000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1577,42723.7,1577000,-31.318,-22,-51.1,313.18




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1578000
  custom_metrics: {}
  date: 2021-10-29_08-59-19
  done: false
  episode_len_mean: 313.7
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.37000000000017
  episode_reward_min: -51.100000000000456
  episodes_this_iter: 3
  episodes_total: 5312
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41530254376182996
          cur_lr: 5.000000000000001e-05
          entropy: 0.9416898330052693
          entropy_coeff: 0.009999999999999998
          kl: 0.008809121118555276
          policy_loss: -0.02328184826506509
          total_loss: 0.4758307491739591
          vf_explained_var: 0.19035544991493225
          vf_loss: 0.5048710438112418
    num_agent_steps_sampled: 1578000
    num_agent_steps_trained: 1578000
    num_steps_sampled: 1578000
    num_steps_trained: 1578000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1578,42765.3,1578000,-31.37,-22,-51.1,313.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1579000
  custom_metrics: {}
  date: 2021-10-29_08-59-47
  done: false
  episode_len_mean: 310.61
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -31.06100000000017
  episode_reward_min: -51.100000000000456
  episodes_this_iter: 4
  episodes_total: 5316
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41530254376182996
          cur_lr: 5.000000000000001e-05
          entropy: 0.5927087853352229
          entropy_coeff: 0.009999999999999998
          kl: 0.013556814155228301
          policy_loss: -0.016316028518809214
          total_loss: 0.788171562883589
          vf_explained_var: 0.5830504298210144
          vf_loss: 0.8047844790750079
    num_agent_steps_sampled: 1579000
    num_agent_steps_trained: 1579000
    num_steps_sampled: 1579000
    num_steps_trained: 1579000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1579,42792.5,1579000,-31.061,-22,-51.1,310.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1580000
  custom_metrics: {}
  date: 2021-10-29_09-00-18
  done: false
  episode_len_mean: 307.16
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -30.716000000000168
  episode_reward_min: -51.100000000000456
  episodes_this_iter: 4
  episodes_total: 5320
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41530254376182996
          cur_lr: 5.000000000000001e-05
          entropy: 0.39238172554307515
          entropy_coeff: 0.009999999999999998
          kl: 0.007384339392105548
          policy_loss: 0.00045241978433397084
          total_loss: 0.6012528747320175
          vf_explained_var: 0.4723303020000458
          vf_loss: 0.6016575379504098
    num_agent_steps_sampled: 1580000
    num_agent_steps_trained: 1580000
    num_steps_sampled: 1580000
    num_steps_trained: 158

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1580,42823.8,1580000,-30.716,-22,-51.1,307.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1581000
  custom_metrics: {}
  date: 2021-10-29_09-00-48
  done: false
  episode_len_mean: 302.6
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -30.260000000000158
  episode_reward_min: -51.100000000000456
  episodes_this_iter: 4
  episodes_total: 5324
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41530254376182996
          cur_lr: 5.000000000000001e-05
          entropy: 0.7031898730330997
          entropy_coeff: 0.009999999999999998
          kl: 0.00709686104131987
          policy_loss: 0.008079812592930264
          total_loss: 0.6263297657171886
          vf_explained_var: 0.5436984300613403
          vf_loss: 0.6223345067765977
    num_agent_steps_sampled: 1581000
    num_agent_steps_trained: 1581000
    num_steps_sampled: 1581000
    num_steps_trained: 1581000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1581,42853.3,1581000,-30.26,-22,-51.1,302.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1582000
  custom_metrics: {}
  date: 2021-10-29_09-01-15
  done: false
  episode_len_mean: 301.0
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -30.100000000000158
  episode_reward_min: -51.100000000000456
  episodes_this_iter: 3
  episodes_total: 5327
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41530254376182996
          cur_lr: 5.000000000000001e-05
          entropy: 0.5982474654912948
          entropy_coeff: 0.009999999999999998
          kl: 0.0035401889864975227
          policy_loss: 0.05499634974532657
          total_loss: 0.5943325801028145
          vf_explained_var: 0.5100197792053223
          vf_loss: 0.5438484610782729
    num_agent_steps_sampled: 1582000
    num_agent_steps_trained: 1582000
    num_steps_sampled: 1582000
    num_steps_trained: 1582000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1582,42880.5,1582000,-30.1,-22,-51.1,301


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1583000
  custom_metrics: {}
  date: 2021-10-29_09-01-41
  done: false
  episode_len_mean: 297.84
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -29.784000000000155
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 4
  episodes_total: 5331
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20765127188091498
          cur_lr: 5.000000000000001e-05
          entropy: 0.819272306892607
          entropy_coeff: 0.009999999999999998
          kl: 0.024021289099220727
          policy_loss: 0.03317895829677582
          total_loss: 0.8594272434711456
          vf_explained_var: 0.4363463819026947
          vf_loss: 0.8294529411527846
    num_agent_steps_sampled: 1583000
    num_agent_steps_trained: 1583000
    num_steps_sampled: 1583000
    num_steps_trained: 1583000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1583,42906.9,1583000,-29.784,-22,-46.6,297.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1584000
  custom_metrics: {}
  date: 2021-10-29_09-02-10
  done: false
  episode_len_mean: 295.84
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -29.584000000000145
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 4
  episodes_total: 5335
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3114769078213725
          cur_lr: 5.000000000000001e-05
          entropy: 0.6776162604490916
          entropy_coeff: 0.009999999999999998
          kl: 0.01883153086708502
          policy_loss: -0.004723894099394481
          total_loss: 0.8236740036143197
          vf_explained_var: 0.3145419955253601
          vf_loss: 0.8293084700902303
    num_agent_steps_sampled: 1584000
    num_agent_steps_trained: 1584000
    num_steps_sampled: 1584000
    num_steps_trained: 1584000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1584,42935.3,1584000,-29.584,-22,-46.6,295.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1585000
  custom_metrics: {}
  date: 2021-10-29_09-02-33
  done: false
  episode_len_mean: 296.03
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -29.603000000000147
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 3
  episodes_total: 5338
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3114769078213725
          cur_lr: 5.000000000000001e-05
          entropy: 0.7320508393976424
          entropy_coeff: 0.009999999999999998
          kl: 0.013593828144009102
          policy_loss: 0.07475634912649791
          total_loss: 0.6155247045887842
          vf_explained_var: 0.24200303852558136
          vf_loss: 0.5438547017673652
    num_agent_steps_sampled: 1585000
    num_agent_steps_trained: 1585000
    num_steps_sampled: 1585000
    num_steps_trained: 1585000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1585,42958.7,1585000,-29.603,-22,-46.6,296.03




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1586000
  custom_metrics: {}
  date: 2021-10-29_09-03-14
  done: false
  episode_len_mean: 296.81
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -29.681000000000154
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 3
  episodes_total: 5341
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3114769078213725
          cur_lr: 5.000000000000001e-05
          entropy: 0.7403201659520467
          entropy_coeff: 0.009999999999999998
          kl: 0.010564770930223549
          policy_loss: 0.03224132673607932
          total_loss: 0.9421823832723829
          vf_explained_var: 0.15290682017803192
          vf_loss: 0.9140535728798972
    num_agent_steps_sampled: 1586000
    num_agent_steps_trained: 1586000
    num_steps_sampled: 1586000
    num_steps_trained: 1586000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1586,42999.5,1586000,-29.681,-22,-46.6,296.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1587000
  custom_metrics: {}
  date: 2021-10-29_09-03-39
  done: false
  episode_len_mean: 292.51
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -29.25100000000014
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 3
  episodes_total: 5344
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3114769078213725
          cur_lr: 5.000000000000001e-05
          entropy: 0.40819043583340114
          entropy_coeff: 0.009999999999999998
          kl: 0.011813297344862958
          policy_loss: -0.10310624291499455
          total_loss: 0.6083473960558573
          vf_explained_var: 0.5481910109519958
          vf_loss: 0.7118559797604879
    num_agent_steps_sampled: 1587000
    num_agent_steps_trained: 1587000
    num_steps_sampled: 1587000
    num_steps_trained: 1587000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1587,43024.7,1587000,-29.251,-22,-46.6,292.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1588000
  custom_metrics: {}
  date: 2021-10-29_09-04-07
  done: false
  episode_len_mean: 291.94
  episode_media: {}
  episode_reward_max: -22.000000000000043
  episode_reward_mean: -29.19400000000014
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 4
  episodes_total: 5348
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3114769078213725
          cur_lr: 5.000000000000001e-05
          entropy: 0.5079785962899526
          entropy_coeff: 0.009999999999999998
          kl: 0.019743437702129482
          policy_loss: -0.034893036882082624
          total_loss: 1.202353380786048
          vf_explained_var: 0.3581750988960266
          vf_loss: 1.2361765755547418
    num_agent_steps_sampled: 1588000
    num_agent_steps_trained: 1588000
    num_steps_sampled: 1588000
    num_steps_trained: 1588000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1588,43053.1,1588000,-29.194,-22,-46.6,291.94


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1589000
  custom_metrics: {}
  date: 2021-10-29_09-04-34
  done: false
  episode_len_mean: 289.79
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.97900000000014
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 4
  episodes_total: 5352
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3114769078213725
          cur_lr: 5.000000000000001e-05
          entropy: 0.5500399033228557
          entropy_coeff: 0.009999999999999998
          kl: 0.009225404668516844
          policy_loss: -0.04103704003824128
          total_loss: 0.8528685728708904
          vf_explained_var: 0.47470054030418396
          vf_loss: 0.8965325097242991
    num_agent_steps_sampled: 1589000
    num_agent_steps_trained: 1589000
    num_steps_sampled: 1589000
    num_steps_trained: 1589000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1589,43079.3,1589000,-28.979,-22.1,-46.6,289.79


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1590000
  custom_metrics: {}
  date: 2021-10-29_09-05-02
  done: false
  episode_len_mean: 289.99
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.999000000000148
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 3
  episodes_total: 5355
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3114769078213725
          cur_lr: 5.000000000000001e-05
          entropy: 0.4296058628294203
          entropy_coeff: 0.009999999999999998
          kl: 0.014816422321104975
          policy_loss: -0.10399621211820179
          total_loss: 0.645036386201779
          vf_explained_var: 0.5950190424919128
          vf_loss: 0.7487136819296413
    num_agent_steps_sampled: 1590000
    num_agent_steps_trained: 1590000
    num_steps_sampled: 1590000
    num_steps_trained: 1590000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1590,43107.6,1590000,-28.999,-22.1,-46.6,289.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1591000
  custom_metrics: {}
  date: 2021-10-29_09-05-27
  done: false
  episode_len_mean: 288.92
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.892000000000138
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 4
  episodes_total: 5359
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3114769078213725
          cur_lr: 5.000000000000001e-05
          entropy: 0.564864667918947
          entropy_coeff: 0.009999999999999998
          kl: 0.01854729020979396
          policy_loss: 0.04735715347859595
          total_loss: 0.8502631578180525
          vf_explained_var: 0.635770320892334
          vf_loss: 0.8027775996261173
    num_agent_steps_sampled: 1591000
    num_agent_steps_trained: 1591000
    num_steps_sampled: 1591000
    num_steps_trained: 1591000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1591,43132.5,1591000,-28.892,-22.1,-46.6,288.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1592000
  custom_metrics: {}
  date: 2021-10-29_09-05-54
  done: false
  episode_len_mean: 289.1
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.910000000000142
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 3
  episodes_total: 5362
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3114769078213725
          cur_lr: 5.000000000000001e-05
          entropy: 0.5626850028832754
          entropy_coeff: 0.009999999999999998
          kl: 0.026638960559170737
          policy_loss: -0.03274777556459109
          total_loss: 0.7100231834583812
          vf_explained_var: 0.47623953223228455
          vf_loss: 0.740100384172466
    num_agent_steps_sampled: 1592000
    num_agent_steps_trained: 1592000
    num_steps_sampled: 1592000
    num_steps_trained: 1592000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1592,43159.8,1592000,-28.91,-22.1,-46.6,289.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1593000
  custom_metrics: {}
  date: 2021-10-29_09-06-22
  done: false
  episode_len_mean: 289.79
  episode_media: {}
  episode_reward_max: -22.100000000000044
  episode_reward_mean: -28.979000000000138
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 4
  episodes_total: 5366
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46721536173205885
          cur_lr: 5.000000000000001e-05
          entropy: 0.42138867610030706
          entropy_coeff: 0.009999999999999998
          kl: 0.0038810640997704285
          policy_loss: -0.05212599808971087
          total_loss: 0.6996495028336843
          vf_explained_var: 0.598778486251831
          vf_loss: 0.7541760891675949
    num_agent_steps_sampled: 1593000
    num_agent_steps_trained: 1593000
    num_steps_sampled: 1593000
    num_steps_trained: 159300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1593,43187.4,1593000,-28.979,-22.1,-46.6,289.79




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1594000
  custom_metrics: {}
  date: 2021-10-29_09-07-07
  done: false
  episode_len_mean: 287.73
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -28.77300000000014
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 4
  episodes_total: 5370
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23360768086602943
          cur_lr: 5.000000000000001e-05
          entropy: 0.32456727110677297
          entropy_coeff: 0.009999999999999998
          kl: 0.003474601262337905
          policy_loss: 0.14162228140566085
          total_loss: 0.6805695593357086
          vf_explained_var: 0.6325388550758362
          vf_loss: 0.5413812451892429
    num_agent_steps_sampled: 1594000
    num_agent_steps_trained: 1594000
    num_steps_sampled: 1594000
    num_steps_trained: 1594000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1594,43232.1,1594000,-28.773,-20.8,-46.6,287.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1595000
  custom_metrics: {}
  date: 2021-10-29_09-07-37
  done: false
  episode_len_mean: 287.96
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -28.796000000000134
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 4
  episodes_total: 5374
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.11680384043301471
          cur_lr: 5.000000000000001e-05
          entropy: 0.5774775253401863
          entropy_coeff: 0.009999999999999998
          kl: 0.033753758675581114
          policy_loss: -0.02370481135116683
          total_loss: 0.785357344812817
          vf_explained_var: 0.3844536244869232
          vf_loss: 0.8108943591515223
    num_agent_steps_sampled: 1595000
    num_agent_steps_trained: 1595000
    num_steps_sampled: 1595000
    num_steps_trained: 1595000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1595,43262.1,1595000,-28.796,-20.8,-46.6,287.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1596000
  custom_metrics: {}
  date: 2021-10-29_09-07-56
  done: false
  episode_len_mean: 288.34
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -28.83400000000013
  episode_reward_min: -46.60000000000039
  episodes_this_iter: 2
  episodes_total: 5376
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17520576064952206
          cur_lr: 5.000000000000001e-05
          entropy: 0.548478341433737
          entropy_coeff: 0.009999999999999998
          kl: 0.01634276248646371
          policy_loss: -0.002805953472852707
          total_loss: 0.4802406128909853
          vf_explained_var: 0.7188769578933716
          vf_loss: 0.4856680038074652
    num_agent_steps_sampled: 1596000
    num_agent_steps_trained: 1596000
    num_steps_sampled: 1596000
    num_steps_trained: 1596000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1596,43281.2,1596000,-28.834,-20.8,-46.6,288.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1597000
  custom_metrics: {}
  date: 2021-10-29_09-08-06
  done: false
  episode_len_mean: 293.4
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -29.34000000000013
  episode_reward_min: -82.59999999999958
  episodes_this_iter: 1
  episodes_total: 5377
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17520576064952206
          cur_lr: 5.000000000000001e-05
          entropy: 0.26751398162709344
          entropy_coeff: 0.009999999999999998
          kl: 0.0014322492909185612
          policy_loss: -0.00931515163845486
          total_loss: 0.7576259420977698
          vf_explained_var: -0.00020542873244266957
          vf_loss: 0.7693652809235371
    num_agent_steps_sampled: 1597000
    num_agent_steps_trained: 1597000
    num_steps_sampled: 1597000
    num_steps_trained: 15

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1597,43291.7,1597000,-29.34,-20.8,-82.6,293.4


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1598000
  custom_metrics: {}
  date: 2021-10-29_09-08-22
  done: false
  episode_len_mean: 295.74
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -29.574000000000133
  episode_reward_min: -82.59999999999958
  episodes_this_iter: 2
  episodes_total: 5379
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08760288032476103
          cur_lr: 5.000000000000001e-05
          entropy: 0.25279089676009286
          entropy_coeff: 0.009999999999999998
          kl: 0.01699273857196041
          policy_loss: -0.06887510766585668
          total_loss: 0.404231592019399
          vf_explained_var: 0.359561949968338
          vf_loss: 0.4741459992196825
    num_agent_steps_sampled: 1598000
    num_agent_steps_trained: 1598000
    num_steps_sampled: 1598000
    num_steps_trained: 1598000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1598,43307.6,1598000,-29.574,-20.8,-82.6,295.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1599000
  custom_metrics: {}
  date: 2021-10-29_09-08-33
  done: false
  episode_len_mean: 302.44
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -30.24400000000012
  episode_reward_min: -97.39999999999874
  episodes_this_iter: 1
  episodes_total: 5380
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08760288032476103
          cur_lr: 5.000000000000001e-05
          entropy: 0.20597258590989642
          entropy_coeff: 0.009999999999999998
          kl: 0.0012938758904147345
          policy_loss: -0.042441501054498884
          total_loss: 0.5085845952232678
          vf_explained_var: -0.2772044539451599
          vf_loss: 0.5529724710103538
    num_agent_steps_sampled: 1599000
    num_agent_steps_trained: 1599000
    num_steps_sampled: 1599000
    num_steps_trained: 1599

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1599,43318.3,1599000,-30.244,-20.8,-97.4,302.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1600000
  custom_metrics: {}
  date: 2021-10-29_09-08-51
  done: false
  episode_len_mean: 309.8
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -30.98000000000011
  episode_reward_min: -97.39999999999874
  episodes_this_iter: 3
  episodes_total: 5383
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.043801440162380516
          cur_lr: 5.000000000000001e-05
          entropy: 0.2943351157837444
          entropy_coeff: 0.009999999999999998
          kl: 0.0026813553842853504
          policy_loss: -0.02261717501613829
          total_loss: 0.8685668925444285
          vf_explained_var: 0.5492063760757446
          vf_loss: 0.8940099697973993
    num_agent_steps_sampled: 1600000
    num_agent_steps_trained: 1600000
    num_steps_sampled: 1600000
    num_steps_trained: 1600000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1600,43336.5,1600000,-30.98,-20.8,-97.4,309.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1601000
  custom_metrics: {}
  date: 2021-10-29_09-09-12
  done: false
  episode_len_mean: 308.85
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -30.885000000000105
  episode_reward_min: -97.39999999999874
  episodes_this_iter: 2
  episodes_total: 5385
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021900720081190258
          cur_lr: 5.000000000000001e-05
          entropy: 0.3392515664299329
          entropy_coeff: 0.009999999999999998
          kl: 0.015468212553613291
          policy_loss: 0.0009977239701482986
          total_loss: 0.5669093103872405
          vf_explained_var: 0.7510334253311157
          vf_loss: 0.5689653422269556
    num_agent_steps_sampled: 1601000
    num_agent_steps_trained: 1601000
    num_steps_sampled: 1601000
    num_steps_trained: 16010

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1601,43357.5,1601000,-30.885,-20.8,-97.4,308.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1602000
  custom_metrics: {}
  date: 2021-10-29_09-09-33
  done: false
  episode_len_mean: 308.67
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -30.867000000000104
  episode_reward_min: -97.39999999999874
  episodes_this_iter: 3
  episodes_total: 5388
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021900720081190258
          cur_lr: 5.000000000000001e-05
          entropy: 0.39263091882069906
          entropy_coeff: 0.009999999999999998
          kl: 0.01972695788150567
          policy_loss: -0.025770627955595652
          total_loss: 0.7807585981157091
          vf_explained_var: 0.6704418063163757
          vf_loss: 0.8100235005219777
    num_agent_steps_sampled: 1602000
    num_agent_steps_trained: 1602000
    num_steps_sampled: 1602000
    num_steps_trained: 16020

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1602,43378.4,1602000,-30.867,-20.8,-97.4,308.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1603000
  custom_metrics: {}
  date: 2021-10-29_09-09-46
  done: false
  episode_len_mean: 309.09
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -30.909000000000102
  episode_reward_min: -97.39999999999874
  episodes_this_iter: 1
  episodes_total: 5389
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.021900720081190258
          cur_lr: 5.000000000000001e-05
          entropy: 0.2522665061884456
          entropy_coeff: 0.009999999999999998
          kl: 0.0036614373516321227
          policy_loss: -0.04750772234466341
          total_loss: 0.18590829943617185
          vf_explained_var: 0.19479936361312866
          vf_loss: 0.23585849863787492
    num_agent_steps_sampled: 1603000
    num_agent_steps_trained: 1603000
    num_steps_sampled: 1603000
    num_steps_trained: 16

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1603,43391.2,1603000,-30.909,-20.8,-97.4,309.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1604000
  custom_metrics: {}
  date: 2021-10-29_09-09-56
  done: false
  episode_len_mean: 316.35
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -31.63500000000009
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5390
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010950360040595129
          cur_lr: 5.000000000000001e-05
          entropy: 0.26174795313013927
          entropy_coeff: 0.009999999999999998
          kl: 0.0016664777712591672
          policy_loss: -0.05190793159935209
          total_loss: 0.5776779060562451
          vf_explained_var: -0.5681164860725403
          vf_loss: 0.6321850741385586
    num_agent_steps_sampled: 1604000
    num_agent_steps_trained: 1604000
    num_steps_sampled: 1604000
    num_steps_trained: 1604

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1604,43401.6,1604000,-31.635,-20.8,-97.7,316.35


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1605000
  custom_metrics: {}
  date: 2021-10-29_09-10-11
  done: false
  episode_len_mean: 324.14
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -32.41400000000009
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5392
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0054751800202975645
          cur_lr: 5.000000000000001e-05
          entropy: 0.33446113069852196
          entropy_coeff: 0.009999999999999998
          kl: 0.01955035393813173
          policy_loss: -0.07516465493374401
          total_loss: 0.9101323856247796
          vf_explained_var: -0.04960129037499428
          vf_loss: 0.9885346175067955
    num_agent_steps_sampled: 1605000
    num_agent_steps_trained: 1605000
    num_steps_sampled: 1605000
    num_steps_trained: 1605

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1605,43415.8,1605000,-32.414,-20.8,-97.7,324.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1606000
  custom_metrics: {}
  date: 2021-10-29_09-10-26
  done: false
  episode_len_mean: 331.16
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -33.11600000000008
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5394
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0054751800202975645
          cur_lr: 5.000000000000001e-05
          entropy: 0.23246209174394608
          entropy_coeff: 0.009999999999999998
          kl: 0.03539167287697625
          policy_loss: -0.04568118900060654
          total_loss: 0.8493595133225124
          vf_explained_var: 0.05350833386182785
          vf_loss: 0.8971715463532342
    num_agent_steps_sampled: 1606000
    num_agent_steps_trained: 1606000
    num_steps_sampled: 1606000
    num_steps_trained: 16060

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1606,43431.5,1606000,-33.116,-20.8,-97.7,331.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1607000
  custom_metrics: {}
  date: 2021-10-29_09-10-43
  done: false
  episode_len_mean: 337.28
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -33.728000000000065
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5396
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008212770030446346
          cur_lr: 5.000000000000001e-05
          entropy: 0.22974115990930133
          entropy_coeff: 0.009999999999999998
          kl: 0.01551153468480708
          policy_loss: 0.010525790684752994
          total_loss: 0.7126385915610526
          vf_explained_var: -0.6064134240150452
          vf_loss: 0.7042828049096796
    num_agent_steps_sampled: 1607000
    num_agent_steps_trained: 1607000
    num_steps_sampled: 1607000
    num_steps_trained: 16070

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1607,43448.3,1607000,-33.728,-20.8,-97.7,337.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1608000
  custom_metrics: {}
  date: 2021-10-29_09-10-56
  done: false
  episode_len_mean: 338.29
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -33.829000000000065
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5397
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008212770030446346
          cur_lr: 5.000000000000001e-05
          entropy: 0.7412848154703776
          entropy_coeff: 0.009999999999999998
          kl: 0.06411910596518713
          policy_loss: -0.03230643015768793
          total_loss: 0.36847140772475134
          vf_explained_var: 0.001068285433575511
          vf_loss: 0.40766409532063536
    num_agent_steps_sampled: 1608000
    num_agent_steps_trained: 1608000
    num_steps_sampled: 1608000
    num_steps_trained: 160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1608,43461.4,1608000,-33.829,-20.8,-97.7,338.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1609000
  custom_metrics: {}
  date: 2021-10-29_09-11-09
  done: false
  episode_len_mean: 348.05
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -34.805000000000064
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5399
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012319155045669522
          cur_lr: 5.000000000000001e-05
          entropy: 0.5787348038620419
          entropy_coeff: 0.009999999999999998
          kl: 0.022738076061065766
          policy_loss: 0.016408745778931512
          total_loss: 0.6911563273933199
          vf_explained_var: -0.5854796767234802
          vf_loss: 0.6802548014041451
    num_agent_steps_sampled: 1609000
    num_agent_steps_trained: 1609000
    num_steps_sampled: 1609000
    num_steps_trained: 16090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1609,43474.3,1609000,-34.805,-20.8,-97.7,348.05




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1610000
  custom_metrics: {}
  date: 2021-10-29_09-11-42
  done: false
  episode_len_mean: 353.14
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -35.31400000000007
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5401
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.018478732568504273
          cur_lr: 5.000000000000001e-05
          entropy: 1.1002294010586209
          entropy_coeff: 0.009999999999999998
          kl: 0.017595576972787005
          policy_loss: 0.11952061785591973
          total_loss: 0.492818694975641
          vf_explained_var: 0.0894562378525734
          vf_loss: 0.38397521977312865
    num_agent_steps_sampled: 1610000
    num_agent_steps_trained: 1610000
    num_steps_sampled: 1610000
    num_steps_trained: 1610000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1610,43507.6,1610000,-35.314,-20.8,-97.7,353.14


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1611000
  custom_metrics: {}
  date: 2021-10-29_09-12-05
  done: false
  episode_len_mean: 354.44
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -35.44400000000007
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 3
  episodes_total: 5404
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.018478732568504273
          cur_lr: 5.000000000000001e-05
          entropy: 1.315683185391956
          entropy_coeff: 0.009999999999999998
          kl: 0.015222518166107098
          policy_loss: 0.07805822686188751
          total_loss: 1.0044380482700137
          vf_explained_var: -0.057613175362348557
          vf_loss: 0.9392553544085886
    num_agent_steps_sampled: 1611000
    num_agent_steps_trained: 1611000
    num_steps_sampled: 1611000
    num_steps_trained: 161100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1611,43529.8,1611000,-35.444,-20.8,-97.7,354.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1612000
  custom_metrics: {}
  date: 2021-10-29_09-12-25
  done: false
  episode_len_mean: 355.55
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -35.55500000000008
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5406
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.018478732568504273
          cur_lr: 5.000000000000001e-05
          entropy: 1.334783795144823
          entropy_coeff: 0.009999999999999998
          kl: 0.017620747848231765
          policy_loss: -0.08808906806839836
          total_loss: 0.9131090528435177
          vf_explained_var: -0.21205224096775055
          vf_loss: 1.0142203503184848
    num_agent_steps_sampled: 1612000
    num_agent_steps_trained: 1612000
    num_steps_sampled: 1612000
    num_steps_trained: 161200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1612,43550.3,1612000,-35.555,-20.8,-97.7,355.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1613000
  custom_metrics: {}
  date: 2021-10-29_09-12-44
  done: false
  episode_len_mean: 359.95
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -35.995000000000076
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 3
  episodes_total: 5409
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.018478732568504273
          cur_lr: 5.000000000000001e-05
          entropy: 1.6033427543110317
          entropy_coeff: 0.009999999999999998
          kl: 0.04119765424441273
          policy_loss: 0.05888959326677852
          total_loss: 0.8672455310821533
          vf_explained_var: -0.049034975469112396
          vf_loss: 0.8236280761555665
    num_agent_steps_sampled: 1613000
    num_agent_steps_trained: 1613000
    num_steps_sampled: 1613000
    num_steps_trained: 16130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1613,43568.9,1613000,-35.995,-20.8,-97.7,359.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1614000
  custom_metrics: {}
  date: 2021-10-29_09-13-02
  done: false
  episode_len_mean: 361.97
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -36.197000000000074
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5411
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.027718098852756407
          cur_lr: 5.000000000000001e-05
          entropy: 1.073198640346527
          entropy_coeff: 0.009999999999999998
          kl: 0.06366137392303423
          policy_loss: -0.0012100507815678914
          total_loss: 0.9056203507714802
          vf_explained_var: 0.12665694952011108
          vf_loss: 0.9157978129055765
    num_agent_steps_sampled: 1614000
    num_agent_steps_trained: 1614000
    num_steps_sampled: 1614000
    num_steps_trained: 16140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1614,43586.7,1614000,-36.197,-20.8,-97.7,361.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1615000
  custom_metrics: {}
  date: 2021-10-29_09-13-16
  done: false
  episode_len_mean: 368.08
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -36.808000000000085
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5413
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04157714827913461
          cur_lr: 5.000000000000001e-05
          entropy: 1.238268706533644
          entropy_coeff: 0.009999999999999998
          kl: 0.015104853990903264
          policy_loss: 0.08204598757955762
          total_loss: 0.8012527998950746
          vf_explained_var: -0.4847415089607239
          vf_loss: 0.7309614946030908
    num_agent_steps_sampled: 1615000
    num_agent_steps_trained: 1615000
    num_steps_sampled: 1615000
    num_steps_trained: 1615000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1615,43600.9,1615000,-36.808,-20.8,-97.7,368.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1616000
  custom_metrics: {}
  date: 2021-10-29_09-13-32
  done: false
  episode_len_mean: 369.46
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -36.94600000000009
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5414
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04157714827913461
          cur_lr: 5.000000000000001e-05
          entropy: 1.2426714128918117
          entropy_coeff: 0.009999999999999998
          kl: 0.01818419695054799
          policy_loss: -0.055783332222037844
          total_loss: 0.5859108183119032
          vf_explained_var: -0.35512009263038635
          vf_loss: 0.653364829501758
    num_agent_steps_sampled: 1616000
    num_agent_steps_trained: 1616000
    num_steps_sampled: 1616000
    num_steps_trained: 1616000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1616,43616.9,1616000,-36.946,-20.8,-97.7,369.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1617000
  custom_metrics: {}
  date: 2021-10-29_09-13-49
  done: false
  episode_len_mean: 377.78
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -37.778000000000105
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 3
  episodes_total: 5417
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04157714827913461
          cur_lr: 5.000000000000001e-05
          entropy: 1.1865897874037425
          entropy_coeff: 0.009999999999999998
          kl: 0.01236462694084229
          policy_loss: 0.04363376994927724
          total_loss: 1.3850145426061418
          vf_explained_var: -0.2907394766807556
          vf_loss: 1.352732587274578
    num_agent_steps_sampled: 1617000
    num_agent_steps_trained: 1617000
    num_steps_sampled: 1617000
    num_steps_trained: 1617000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1617,43633.9,1617000,-37.778,-20.8,-97.7,377.78


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1618000
  custom_metrics: {}
  date: 2021-10-29_09-14-09
  done: false
  episode_len_mean: 379.5
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -37.95000000000011
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5419
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04157714827913461
          cur_lr: 5.000000000000001e-05
          entropy: 1.5330371260643005
          entropy_coeff: 0.009999999999999998
          kl: 0.02457312666437193
          policy_loss: -0.0049472227692604065
          total_loss: 0.8064575751622518
          vf_explained_var: 0.18015840649604797
          vf_loss: 0.8257134809676144
    num_agent_steps_sampled: 1618000
    num_agent_steps_trained: 1618000
    num_steps_sampled: 1618000
    num_steps_trained: 1618000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1618,43653.7,1618000,-37.95,-20.8,-97.7,379.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1619000
  custom_metrics: {}
  date: 2021-10-29_09-14-23
  done: false
  episode_len_mean: 388.07
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -38.80700000000011
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5421
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.062365722418701926
          cur_lr: 5.000000000000001e-05
          entropy: 1.4327497124671935
          entropy_coeff: 0.009999999999999998
          kl: 0.011621406673756912
          policy_loss: 0.08932776865031984
          total_loss: 0.902818379799525
          vf_explained_var: -0.6308490037918091
          vf_loss: 0.8270933429400126
    num_agent_steps_sampled: 1619000
    num_agent_steps_trained: 1619000
    num_steps_sampled: 1619000
    num_steps_trained: 1619000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1619,43667.6,1619000,-38.807,-20.8,-97.7,388.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1620000
  custom_metrics: {}
  date: 2021-10-29_09-14-44
  done: false
  episode_len_mean: 389.21
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -38.921000000000106
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5423
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.062365722418701926
          cur_lr: 5.000000000000001e-05
          entropy: 1.22169461912579
          entropy_coeff: 0.009999999999999998
          kl: 0.02844955800908781
          policy_loss: -0.04859881160987748
          total_loss: 1.0414801836013794
          vf_explained_var: -0.015682144090533257
          vf_loss: 1.100521650744809
    num_agent_steps_sampled: 1620000
    num_agent_steps_trained: 1620000
    num_steps_sampled: 1620000
    num_steps_trained: 1620000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1620,43689.2,1620000,-38.921,-20.8,-97.7,389.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1621000
  custom_metrics: {}
  date: 2021-10-29_09-14-59
  done: false
  episode_len_mean: 394.9
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -39.490000000000116
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5425
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09354858362805292
          cur_lr: 5.000000000000001e-05
          entropy: 1.269955724477768
          entropy_coeff: 0.009999999999999998
          kl: 0.03419487118750254
          policy_loss: -0.11553120993905597
          total_loss: 1.0040541698535284
          vf_explained_var: -0.07943212985992432
          vf_loss: 1.1290860421127744
    num_agent_steps_sampled: 1621000
    num_agent_steps_trained: 1621000
    num_steps_sampled: 1621000
    num_steps_trained: 1621000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1621,43704.2,1621000,-39.49,-20.8,-97.7,394.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1622000
  custom_metrics: {}
  date: 2021-10-29_09-15-19
  done: false
  episode_len_mean: 398.34
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -39.834000000000124
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5427
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14032287544207936
          cur_lr: 5.000000000000001e-05
          entropy: 1.4027480721473693
          entropy_coeff: 0.009999999999999998
          kl: 0.03569705033440521
          policy_loss: -0.034792699582046935
          total_loss: 0.8765757941537433
          vf_explained_var: 0.07671479880809784
          vf_loss: 0.9203868572703666
    num_agent_steps_sampled: 1622000
    num_agent_steps_trained: 1622000
    num_steps_sampled: 1622000
    num_steps_trained: 162200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1622,43723.8,1622000,-39.834,-20.8,-97.7,398.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1623000
  custom_metrics: {}
  date: 2021-10-29_09-15-33
  done: false
  episode_len_mean: 403.61
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -40.36100000000013
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5429
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21048431316311905
          cur_lr: 5.000000000000001e-05
          entropy: 1.3528468635347155
          entropy_coeff: 0.009999999999999998
          kl: 0.018478795438782728
          policy_loss: -0.12018495959540208
          total_loss: 0.6778898235824373
          vf_explained_var: -0.05864459648728371
          vf_loss: 0.8077137540611956
    num_agent_steps_sampled: 1623000
    num_agent_steps_trained: 1623000
    num_steps_sampled: 1623000
    num_steps_trained: 162300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1623,43737.8,1623000,-40.361,-20.8,-97.7,403.61




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1624000
  custom_metrics: {}
  date: 2021-10-29_09-16-10
  done: false
  episode_len_mean: 404.83
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -40.48300000000013
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5431
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.21048431316311905
          cur_lr: 5.000000000000001e-05
          entropy: 1.2942184143596225
          entropy_coeff: 0.009999999999999998
          kl: 0.02345930568224589
          policy_loss: -0.051167215241326225
          total_loss: 0.5082835089829233
          vf_explained_var: 0.3212367296218872
          vf_loss: 0.5674550916223476
    num_agent_steps_sampled: 1624000
    num_agent_steps_trained: 1624000
    num_steps_sampled: 1624000
    num_steps_trained: 1624000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1624,43774.5,1624000,-40.483,-20.8,-97.7,404.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1625000
  custom_metrics: {}
  date: 2021-10-29_09-16-30
  done: false
  episode_len_mean: 410.5
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -41.05000000000014
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 3
  episodes_total: 5434
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3157264697446785
          cur_lr: 5.000000000000001e-05
          entropy: 1.326061834891637
          entropy_coeff: 0.009999999999999998
          kl: 0.019574935904833918
          policy_loss: -0.010869669500324461
          total_loss: 0.8281656834814284
          vf_explained_var: 0.2900940775871277
          vf_loss: 0.8461156424548891
    num_agent_steps_sampled: 1625000
    num_agent_steps_trained: 1625000
    num_steps_sampled: 1625000
    num_steps_trained: 1625000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1625,43794.4,1625000,-41.05,-20.8,-97.7,410.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1626000
  custom_metrics: {}
  date: 2021-10-29_09-16-43
  done: false
  episode_len_mean: 415.33
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -41.53300000000013
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5435
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3157264697446785
          cur_lr: 5.000000000000001e-05
          entropy: 1.2480909877353243
          entropy_coeff: 0.009999999999999998
          kl: 0.08011722003220946
          policy_loss: -0.015351198199722502
          total_loss: 0.6649616441792912
          vf_explained_var: 0.4738476574420929
          vf_loss: 0.6674986223379771
    num_agent_steps_sampled: 1626000
    num_agent_steps_trained: 1626000
    num_steps_sampled: 1626000
    num_steps_trained: 1626000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1626,43807.3,1626000,-41.533,-20.8,-97.7,415.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1627000
  custom_metrics: {}
  date: 2021-10-29_09-16-55
  done: false
  episode_len_mean: 423.55
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -42.355000000000125
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5437
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47358970461701777
          cur_lr: 5.000000000000001e-05
          entropy: 1.4310744020673964
          entropy_coeff: 0.009999999999999998
          kl: 0.007885821703864771
          policy_loss: 0.12411249884300762
          total_loss: 0.5499509460396237
          vf_explained_var: 0.23595713078975677
          vf_loss: 0.4364145593924655
    num_agent_steps_sampled: 1627000
    num_agent_steps_trained: 1627000
    num_steps_sampled: 1627000
    num_steps_trained: 1627000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1627,43820,1627000,-42.355,-20.8,-97.7,423.55


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1628000
  custom_metrics: {}
  date: 2021-10-29_09-17-07
  done: false
  episode_len_mean: 427.05
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -42.70500000000013
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5438
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47358970461701777
          cur_lr: 5.000000000000001e-05
          entropy: 1.2996506578392453
          entropy_coeff: 0.009999999999999998
          kl: 0.008828638214950531
          policy_loss: -0.09586315502723058
          total_loss: 0.48935219239857464
          vf_explained_var: -0.2907785475254059
          vf_loss: 0.5940307084884908
    num_agent_steps_sampled: 1628000
    num_agent_steps_trained: 1628000
    num_steps_sampled: 1628000
    num_steps_trained: 162800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1628,43831.9,1628000,-42.705,-20.8,-97.7,427.05


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1629000
  custom_metrics: {}
  date: 2021-10-29_09-17-20
  done: false
  episode_len_mean: 433.16
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -43.316000000000116
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5439
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47358970461701777
          cur_lr: 5.000000000000001e-05
          entropy: 0.6291381448507309
          entropy_coeff: 0.009999999999999998
          kl: 0.0068020519651224715
          policy_loss: -0.14227486033406522
          total_loss: 0.4375183545053005
          vf_explained_var: -0.6688932180404663
          vf_loss: 0.5828632177164157
    num_agent_steps_sampled: 1629000
    num_agent_steps_trained: 1629000
    num_steps_sampled: 1629000
    num_steps_trained: 16290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1629,43844.9,1629000,-43.316,-20.8,-97.7,433.16


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1630000
  custom_metrics: {}
  date: 2021-10-29_09-17-33
  done: false
  episode_len_mean: 438.71
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -43.871000000000116
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5441
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47358970461701777
          cur_lr: 5.000000000000001e-05
          entropy: 1.4722012056244744
          entropy_coeff: 0.009999999999999998
          kl: 0.008805042270720678
          policy_loss: 0.07239379063248634
          total_loss: 0.318167515595754
          vf_explained_var: 0.08929046243429184
          vf_loss: 0.256325757674252
    num_agent_steps_sampled: 1630000
    num_agent_steps_trained: 1630000
    num_steps_sampled: 1630000
    num_steps_trained: 1630000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1630,43857.2,1630000,-43.871,-20.8,-97.7,438.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1631000
  custom_metrics: {}
  date: 2021-10-29_09-17-45
  done: false
  episode_len_mean: 442.6
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -44.26000000000012
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5442
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47358970461701777
          cur_lr: 5.000000000000001e-05
          entropy: 1.47499713367886
          entropy_coeff: 0.009999999999999998
          kl: 0.00764583656872466
          policy_loss: -0.06284442825449837
          total_loss: 0.6426322450240453
          vf_explained_var: -0.6090508103370667
          vf_loss: 0.7166056535827617
    num_agent_steps_sampled: 1631000
    num_agent_steps_trained: 1631000
    num_steps_sampled: 1631000
    num_steps_trained: 1631000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1631,43869.2,1631000,-44.26,-20.8,-97.7,442.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1632000
  custom_metrics: {}
  date: 2021-10-29_09-17-59
  done: false
  episode_len_mean: 451.66
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -45.16600000000013
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5444
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.47358970461701777
          cur_lr: 5.000000000000001e-05
          entropy: 1.4591746674643622
          entropy_coeff: 0.009999999999999998
          kl: 0.004219490979755926
          policy_loss: 0.06348472320371204
          total_loss: 0.7960834099186791
          vf_explained_var: -0.652698278427124
          vf_loss: 0.7451921234114303
    num_agent_steps_sampled: 1632000
    num_agent_steps_trained: 1632000
    num_steps_sampled: 1632000
    num_steps_trained: 1632000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1632,43883.7,1632000,-45.166,-20.8,-97.7,451.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1633000
  custom_metrics: {}
  date: 2021-10-29_09-18-16
  done: false
  episode_len_mean: 455.66
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -45.56600000000013
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5446
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.23679485230850889
          cur_lr: 5.000000000000001e-05
          entropy: 1.3726715087890624
          entropy_coeff: 0.009999999999999998
          kl: 0.024888733169764678
          policy_loss: 0.1892098459932539
          total_loss: 0.5466996712817086
          vf_explained_var: -0.0017503539565950632
          vf_loss: 0.36532301815847557
    num_agent_steps_sampled: 1633000
    num_agent_steps_trained: 1633000
    num_steps_sampled: 1633000
    num_steps_trained: 16330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1633,43900.5,1633000,-45.566,-20.8,-97.7,455.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1634000
  custom_metrics: {}
  date: 2021-10-29_09-18-28
  done: false
  episode_len_mean: 459.49
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -45.949000000000126
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5447
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3551922784627633
          cur_lr: 5.000000000000001e-05
          entropy: 1.2045902298556435
          entropy_coeff: 0.009999999999999998
          kl: 0.010302658880806505
          policy_loss: -0.0889134426911672
          total_loss: 0.6106652185320854
          vf_explained_var: -0.7019678950309753
          vf_loss: 0.7079651354915566
    num_agent_steps_sampled: 1634000
    num_agent_steps_trained: 1634000
    num_steps_sampled: 1634000
    num_steps_trained: 1634000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1634,43912.3,1634000,-45.949,-20.8,-97.7,459.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1635000
  custom_metrics: {}
  date: 2021-10-29_09-18-39
  done: false
  episode_len_mean: 466.23
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -46.62300000000012
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5448
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3551922784627633
          cur_lr: 5.000000000000001e-05
          entropy: 0.6449609471691979
          entropy_coeff: 0.009999999999999998
          kl: 0.0036964590446829427
          policy_loss: -0.04934225529432297
          total_loss: 0.624994987083806
          vf_explained_var: -0.6207091212272644
          vf_loss: 0.6794738951449593
    num_agent_steps_sampled: 1635000
    num_agent_steps_trained: 1635000
    num_steps_sampled: 1635000
    num_steps_trained: 1635000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1635,43923.7,1635000,-46.623,-20.8,-97.7,466.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1636000
  custom_metrics: {}
  date: 2021-10-29_09-18-50
  done: false
  episode_len_mean: 472.44
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -47.244000000000106
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5449
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17759613923138165
          cur_lr: 5.000000000000001e-05
          entropy: 0.6359553913275401
          entropy_coeff: 0.009999999999999998
          kl: 0.005489026007344005
          policy_loss: -0.05522316445906957
          total_loss: 0.6260314229461882
          vf_explained_var: -0.6489281058311462
          vf_loss: 0.6866393200639221
    num_agent_steps_sampled: 1636000
    num_agent_steps_trained: 1636000
    num_steps_sampled: 1636000
    num_steps_trained: 163600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1636,43934.4,1636000,-47.244,-20.8,-97.7,472.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1637000
  custom_metrics: {}
  date: 2021-10-29_09-19-01
  done: false
  episode_len_mean: 478.33
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -47.833000000000105
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5450
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17759613923138165
          cur_lr: 5.000000000000001e-05
          entropy: 1.044794899225235
          entropy_coeff: 0.009999999999999998
          kl: 0.010976042619464563
          policy_loss: -0.044557596743106845
          total_loss: 0.588472698132197
          vf_explained_var: -0.265980988740921
          vf_loss: 0.6415289534152382
    num_agent_steps_sampled: 1637000
    num_agent_steps_trained: 1637000
    num_steps_sampled: 1637000
    num_steps_trained: 1637000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1637,43945.6,1637000,-47.833,-20.8,-97.7,478.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1638000
  custom_metrics: {}
  date: 2021-10-29_09-19-12
  done: false
  episode_len_mean: 489.53
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -48.953000000000095
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5452
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17759613923138165
          cur_lr: 5.000000000000001e-05
          entropy: 0.7033664193418291
          entropy_coeff: 0.009999999999999998
          kl: 0.019839412159252223
          policy_loss: 0.08990464094612334
          total_loss: 0.5005430612299178
          vf_explained_var: -0.45859670639038086
          vf_loss: 0.4141486765609847
    num_agent_steps_sampled: 1638000
    num_agent_steps_trained: 1638000
    num_steps_sampled: 1638000
    num_steps_trained: 163800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1638,43956.6,1638000,-48.953,-20.8,-97.7,489.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1639000
  custom_metrics: {}
  date: 2021-10-29_09-19-25
  done: false
  episode_len_mean: 495.37
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -49.53700000000009
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5453
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.17759613923138165
          cur_lr: 5.000000000000001e-05
          entropy: 0.6257129165861341
          entropy_coeff: 0.009999999999999998
          kl: 0.0038028599373057088
          policy_loss: -0.06803256372610728
          total_loss: 0.6429305929276679
          vf_explained_var: -0.6489390730857849
          vf_loss: 0.7165449108721481
    num_agent_steps_sampled: 1639000
    num_agent_steps_trained: 1639000
    num_steps_sampled: 1639000
    num_steps_trained: 163900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1639,43969.1,1639000,-49.537,-20.8,-97.7,495.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1640000
  custom_metrics: {}
  date: 2021-10-29_09-19-37
  done: false
  episode_len_mean: 497.65
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -49.7650000000001
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5454
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08879806961569083
          cur_lr: 5.000000000000001e-05
          entropy: 0.9988058070341747
          entropy_coeff: 0.009999999999999998
          kl: 0.01577539887956938
          policy_loss: -0.024928811606433655
          total_loss: 0.641974682153927
          vf_explained_var: -0.26683589816093445
          vf_loss: 0.6754907211702731
    num_agent_steps_sampled: 1640000
    num_agent_steps_trained: 1640000
    num_steps_sampled: 1640000
    num_steps_trained: 1640000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1640,43981.2,1640000,-49.765,-20.8,-97.7,497.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1641000
  custom_metrics: {}
  date: 2021-10-29_09-19-51
  done: false
  episode_len_mean: 507.01
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -50.701000000000086
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5456
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08879806961569083
          cur_lr: 5.000000000000001e-05
          entropy: 1.3789937959776983
          entropy_coeff: 0.009999999999999998
          kl: 0.014497712126558494
          policy_loss: -0.08036542154020733
          total_loss: 1.119003760152393
          vf_explained_var: 0.09958324581384659
          vf_loss: 1.211871737904019
    num_agent_steps_sampled: 1641000
    num_agent_steps_trained: 1641000
    num_steps_sampled: 1641000
    num_steps_trained: 1641000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1641,43995.5,1641000,-50.701,-20.8,-97.7,507.01


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1642000
  custom_metrics: {}
  date: 2021-10-29_09-20-05
  done: false
  episode_len_mean: 513.38
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -51.338000000000086
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5458
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.08879806961569083
          cur_lr: 5.000000000000001e-05
          entropy: 1.6655822727415297
          entropy_coeff: 0.009999999999999998
          kl: 0.03574269745587648
          policy_loss: 0.08785264111227459
          total_loss: 0.6639823834101359
          vf_explained_var: -0.04200807586312294
          vf_loss: 0.5896116942167282
    num_agent_steps_sampled: 1642000
    num_agent_steps_trained: 1642000
    num_steps_sampled: 1642000
    num_steps_trained: 1642000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1642,44009.7,1642000,-51.338,-20.8,-97.7,513.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1643000
  custom_metrics: {}
  date: 2021-10-29_09-20-16
  done: false
  episode_len_mean: 519.79
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -51.979000000000084
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5459
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.13319710442353624
          cur_lr: 5.000000000000001e-05
          entropy: 0.7605032205581665
          entropy_coeff: 0.009999999999999998
          kl: 0.023110407975767088
          policy_loss: 0.16907279714941978
          total_loss: 0.19588895986477534
          vf_explained_var: 0.4379989504814148
          vf_loss: 0.03134295586496592
    num_agent_steps_sampled: 1643000
    num_agent_steps_trained: 1643000
    num_steps_sampled: 1643000
    num_steps_trained: 164300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1643,44020.5,1643000,-51.979,-20.8,-97.7,519.79




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1644000
  custom_metrics: {}
  date: 2021-10-29_09-20-47
  done: false
  episode_len_mean: 522.6
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -52.26000000000009
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5460
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1997956566353044
          cur_lr: 5.000000000000001e-05
          entropy: 1.5935349212752448
          entropy_coeff: 0.009999999999999998
          kl: 0.011861814852103454
          policy_loss: -0.017153588351276186
          total_loss: 0.5068352301087644
          vf_explained_var: -0.20742636919021606
          vf_loss: 0.5375542270330091
    num_agent_steps_sampled: 1644000
    num_agent_steps_trained: 1644000
    num_steps_sampled: 1644000
    num_steps_trained: 1644000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1644,44051.4,1644000,-52.26,-20.8,-97.7,522.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1645000
  custom_metrics: {}
  date: 2021-10-29_09-21-08
  done: false
  episode_len_mean: 527.38
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -52.73800000000009
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 3
  episodes_total: 5463
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1997956566353044
          cur_lr: 5.000000000000001e-05
          entropy: 1.4950284361839294
          entropy_coeff: 0.009999999999999998
          kl: 0.00799005271472808
          policy_loss: -0.01046700088514222
          total_loss: 1.3885906077093548
          vf_explained_var: 0.07484253495931625
          vf_loss: 1.412411520547337
    num_agent_steps_sampled: 1645000
    num_agent_steps_trained: 1645000
    num_steps_sampled: 1645000
    num_steps_trained: 1645000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1645,44072.4,1645000,-52.738,-20.8,-97.7,527.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1646000
  custom_metrics: {}
  date: 2021-10-29_09-21-22
  done: false
  episode_len_mean: 532.68
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -53.268000000000086
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 1
  episodes_total: 5464
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1997956566353044
          cur_lr: 5.000000000000001e-05
          entropy: 0.9822123818927341
          entropy_coeff: 0.009999999999999998
          kl: 0.007930487047667563
          policy_loss: -0.06390228213535415
          total_loss: 0.6477471626467175
          vf_explained_var: -0.5362156629562378
          vf_loss: 0.7198870934545993
    num_agent_steps_sampled: 1646000
    num_agent_steps_trained: 1646000
    num_steps_sampled: 1646000
    num_steps_trained: 1646000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1646,44086.3,1646000,-53.268,-20.8,-97.7,532.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1647000
  custom_metrics: {}
  date: 2021-10-29_09-21-41
  done: false
  episode_len_mean: 538.0
  episode_media: {}
  episode_reward_max: -20.800000000000026
  episode_reward_mean: -53.8000000000001
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 3
  episodes_total: 5467
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1997956566353044
          cur_lr: 5.000000000000001e-05
          entropy: 1.6534943911764357
          entropy_coeff: 0.009999999999999998
          kl: 0.03345697303627715
          policy_loss: 0.06123991360267003
          total_loss: 0.9585871461364958
          vf_explained_var: 0.004462746903300285
          vf_loss: 0.9071976095748444
    num_agent_steps_sampled: 1647000
    num_agent_steps_trained: 1647000
    num_steps_sampled: 1647000
    num_steps_trained: 1647000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1647,44105.4,1647000,-53.8,-20.8,-97.7,538


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1648000
  custom_metrics: {}
  date: 2021-10-29_09-22-04
  done: false
  episode_len_mean: 540.63
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -54.0630000000001
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 3
  episodes_total: 5470
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.2527447832955254
          entropy_coeff: 0.009999999999999998
          kl: 0.012210377335204232
          policy_loss: 0.0423917219042778
          total_loss: 1.5400178439087338
          vf_explained_var: -0.16790881752967834
          vf_loss: 1.506494201719761
    num_agent_steps_sampled: 1648000
    num_agent_steps_trained: 1648000
    num_steps_sampled: 1648000
    num_steps_trained: 1648000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1648,44128.2,1648000,-54.063,-22.6,-97.7,540.63


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1649000
  custom_metrics: {}
  date: 2021-10-29_09-22-19
  done: false
  episode_len_mean: 545.1
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -54.51000000000011
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5472
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.4115886052449544
          entropy_coeff: 0.009999999999999998
          kl: 0.012182003237603094
          policy_loss: 0.038260608580377366
          total_loss: 0.7548617624574238
          vf_explained_var: -0.0765194445848465
          vf_loss: 0.7270661691307194
    num_agent_steps_sampled: 1649000
    num_agent_steps_trained: 1649000
    num_steps_sampled: 1649000
    num_steps_trained: 1649000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1649,44143.1,1649000,-54.51,-22.6,-97.7,545.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1650000
  custom_metrics: {}
  date: 2021-10-29_09-22-37
  done: false
  episode_len_mean: 547.95
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -54.795000000000115
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5474
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.6295032766130235
          entropy_coeff: 0.009999999999999998
          kl: 0.018046739219004276
          policy_loss: -0.0852833577328258
          total_loss: 1.2605697890122731
          vf_explained_var: -0.07042454183101654
          vf_loss: 1.3567396827042102
    num_agent_steps_sampled: 1650000
    num_agent_steps_trained: 1650000
    num_steps_sampled: 1650000
    num_steps_trained: 1650000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1650,44161.6,1650000,-54.795,-22.6,-97.7,547.95


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1651000
  custom_metrics: {}
  date: 2021-10-29_09-22-59
  done: false
  episode_len_mean: 544.91
  episode_media: {}
  episode_reward_max: -22.60000000000005
  episode_reward_mean: -54.49100000000012
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 3
  episodes_total: 5477
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.1961057172881233
          entropy_coeff: 0.009999999999999998
          kl: 0.007152021092688996
          policy_loss: 0.02554077704747518
          total_loss: 1.575355953971545
          vf_explained_var: -0.3325822949409485
          vf_loss: 1.559632835123274
    num_agent_steps_sampled: 1651000
    num_agent_steps_trained: 1651000
    num_steps_sampled: 1651000
    num_steps_trained: 1651000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1651,44183.3,1651000,-54.491,-22.6,-97.7,544.91


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1652000
  custom_metrics: {}
  date: 2021-10-29_09-23-16
  done: false
  episode_len_mean: 546.62
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -54.66200000000013
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5479
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.9913396610154046
          entropy_coeff: 0.009999999999999998
          kl: 0.01594335348386843
          policy_loss: 0.09450109923879306
          total_loss: 0.7456295806500647
          vf_explained_var: 0.0979480966925621
          vf_loss: 0.6662637644964787
    num_agent_steps_sampled: 1652000
    num_agent_steps_trained: 1652000
    num_steps_sampled: 1652000
    num_steps_trained: 1652000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1652,44199.8,1652000,-54.662,-23.1,-97.7,546.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1653000
  custom_metrics: {}
  date: 2021-10-29_09-23-34
  done: false
  episode_len_mean: 535.36
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -53.53600000000016
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5481
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.205383653110928
          entropy_coeff: 0.009999999999999998
          kl: 0.009007006696314612
          policy_loss: -0.082238513065709
          total_loss: 1.4097364041540357
          vf_explained_var: -0.27408185601234436
          vf_loss: 1.5013294200102487
    num_agent_steps_sampled: 1653000
    num_agent_steps_trained: 1653000
    num_steps_sampled: 1653000
    num_steps_trained: 1653000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1653,44218.5,1653000,-53.536,-23.1,-97.7,535.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1654000
  custom_metrics: {}
  date: 2021-10-29_09-23-54
  done: false
  episode_len_mean: 536.18
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -53.618000000000166
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5483
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.2147500640816158
          entropy_coeff: 0.009999999999999998
          kl: 0.013657308951902298
          policy_loss: -0.11749518447452122
          total_loss: 1.0093874384959538
          vf_explained_var: -0.22686922550201416
          vf_loss: 1.1349371069835292
    num_agent_steps_sampled: 1654000
    num_agent_steps_trained: 1654000
    num_steps_sampled: 1654000
    num_steps_trained: 165400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1654,44238,1654000,-53.618,-23.1,-97.7,536.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1655000
  custom_metrics: {}
  date: 2021-10-29_09-24-11
  done: false
  episode_len_mean: 539.81
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -53.98100000000018
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5485
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.8086514658398098
          entropy_coeff: 0.009999999999999998
          kl: 0.013418296834894426
          policy_loss: -0.11521411273214552
          total_loss: 1.001590261194441
          vf_explained_var: 0.1643306463956833
          vf_loss: 1.1308694991386599
    num_agent_steps_sampled: 1655000
    num_agent_steps_trained: 1655000
    num_steps_sampled: 1655000
    num_steps_trained: 1655000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1655,44255.1,1655000,-53.981,-24.8,-97.7,539.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1656000
  custom_metrics: {}
  date: 2021-10-29_09-24-28
  done: false
  episode_len_mean: 541.76
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -54.17600000000016
  episode_reward_min: -97.69999999999872
  episodes_this_iter: 2
  episodes_total: 5487
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.137386253145006
          entropy_coeff: 0.009999999999999998
          kl: 0.014453282370323745
          policy_loss: -0.12399854544136259
          total_loss: 0.8772187949882613
          vf_explained_var: 0.11933356523513794
          vf_loss: 1.0082596444421343
    num_agent_steps_sampled: 1656000
    num_agent_steps_trained: 1656000
    num_steps_sampled: 1656000
    num_steps_trained: 1656000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1656,44271.7,1656000,-54.176,-24.8,-97.7,541.76




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1657000
  custom_metrics: {}
  date: 2021-10-29_09-25-07
  done: false
  episode_len_mean: 536.44
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -53.644000000000176
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5490
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.29969348495295656
          cur_lr: 5.000000000000001e-05
          entropy: 1.5488423970010545
          entropy_coeff: 0.009999999999999998
          kl: 0.03501299446107748
          policy_loss: 0.03369680659638511
          total_loss: 1.4445669677522448
          vf_explained_var: 0.3001907467842102
          vf_loss: 1.4158654011785985
    num_agent_steps_sampled: 1657000
    num_agent_steps_trained: 1657000
    num_steps_sampled: 1657000
    num_steps_trained: 1657000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1657,44311.5,1657000,-53.644,-24.8,-95.5,536.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1658000
  custom_metrics: {}
  date: 2021-10-29_09-25-27
  done: false
  episode_len_mean: 525.29
  episode_media: {}
  episode_reward_max: -24.800000000000082
  episode_reward_mean: -52.5290000000002
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5493
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4495402274294351
          cur_lr: 5.000000000000001e-05
          entropy: 1.0266425662570529
          entropy_coeff: 0.009999999999999998
          kl: 0.008865661585978736
          policy_loss: 0.05598650260104073
          total_loss: 1.363353164328469
          vf_explained_var: -0.24769847095012665
          vf_loss: 1.313647595544656
    num_agent_steps_sampled: 1658000
    num_agent_steps_trained: 1658000
    num_steps_sampled: 1658000
    num_steps_trained: 1658000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1658,44330.7,1658000,-52.529,-24.8,-95.5,525.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1659000
  custom_metrics: {}
  date: 2021-10-29_09-25-41
  done: false
  episode_len_mean: 528.7
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -52.87000000000021
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 1
  episodes_total: 5494
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4495402274294351
          cur_lr: 5.000000000000001e-05
          entropy: 1.034579528040356
          entropy_coeff: 0.009999999999999998
          kl: 0.007338640806900148
          policy_loss: -0.09196900067230065
          total_loss: 0.6463860420717134
          vf_explained_var: 0.020240463316440582
          vf_loss: 0.7454018223616812
    num_agent_steps_sampled: 1659000
    num_agent_steps_trained: 1659000
    num_steps_sampled: 1659000
    num_steps_trained: 1659000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1659,44345.4,1659000,-52.87,-26.4,-95.5,528.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1660000
  custom_metrics: {}
  date: 2021-10-29_09-26-04
  done: false
  episode_len_mean: 525.02
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -52.50200000000022
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5497
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4495402274294351
          cur_lr: 5.000000000000001e-05
          entropy: 1.802453104654948
          entropy_coeff: 0.009999999999999998
          kl: 0.014267458216350093
          policy_loss: 0.02865554980105824
          total_loss: 0.9788152552313275
          vf_explained_var: 0.5001852512359619
          vf_loss: 0.9617704446117084
    num_agent_steps_sampled: 1660000
    num_agent_steps_trained: 1660000
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1660,44368.1,1660000,-52.502,-26.4,-95.5,525.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1661000
  custom_metrics: {}
  date: 2021-10-29_09-26-27
  done: false
  episode_len_mean: 515.68
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -51.56800000000022
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5500
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4495402274294351
          cur_lr: 5.000000000000001e-05
          entropy: 1.7103486829333836
          entropy_coeff: 0.009999999999999998
          kl: 0.02696632944041717
          policy_loss: 0.009925362447069751
          total_loss: 0.7767021867964002
          vf_explained_var: 0.27015623450279236
          vf_loss: 0.7717578566736645
    num_agent_steps_sampled: 1661000
    num_agent_steps_trained: 1661000
    num_steps_sampled: 1661000
    num_steps_trained: 1661000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1661,44391,1661000,-51.568,-26.4,-95.5,515.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1662000
  custom_metrics: {}
  date: 2021-10-29_09-26-49
  done: false
  episode_len_mean: 511.39
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -51.13900000000022
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5503
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6743103411441522
          cur_lr: 5.000000000000001e-05
          entropy: 1.6273668474621243
          entropy_coeff: 0.009999999999999998
          kl: 0.007482313870925989
          policy_loss: -0.004804713154832522
          total_loss: 0.9862374775939518
          vf_explained_var: 0.2389286309480667
          vf_loss: 1.0022704654269747
    num_agent_steps_sampled: 1662000
    num_agent_steps_trained: 1662000
    num_steps_sampled: 1662000
    num_steps_trained: 1662000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1662,44412.6,1662000,-51.139,-26.4,-95.5,511.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1663000
  custom_metrics: {}
  date: 2021-10-29_09-27-06
  done: false
  episode_len_mean: 512.65
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -51.265000000000235
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 2
  episodes_total: 5505
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6743103411441522
          cur_lr: 5.000000000000001e-05
          entropy: 0.8469207915994856
          entropy_coeff: 0.009999999999999998
          kl: 0.01002948358928754
          policy_loss: -0.06129255112674501
          total_loss: 1.029951441122426
          vf_explained_var: 0.014296993613243103
          vf_loss: 1.092950219495429
    num_agent_steps_sampled: 1663000
    num_agent_steps_trained: 1663000
    num_steps_sampled: 1663000
    num_steps_trained: 1663000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1663,44429.9,1663000,-51.265,-26.4,-95.5,512.65


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1664000
  custom_metrics: {}
  date: 2021-10-29_09-27-21
  done: false
  episode_len_mean: 515.62
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -51.56200000000023
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 2
  episodes_total: 5507
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6743103411441522
          cur_lr: 5.000000000000001e-05
          entropy: 0.8214091532760196
          entropy_coeff: 0.009999999999999998
          kl: 0.006693860383963962
          policy_loss: -0.08484432995319366
          total_loss: 1.2386486076646381
          vf_explained_var: -0.28081098198890686
          vf_loss: 1.3271933048963547
    num_agent_steps_sampled: 1664000
    num_agent_steps_trained: 1664000
    num_steps_sampled: 1664000
    num_steps_trained: 1664000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1664,44445.2,1664000,-51.562,-26.4,-95.5,515.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1665000
  custom_metrics: {}
  date: 2021-10-29_09-27-39
  done: false
  episode_len_mean: 518.09
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -51.80900000000023
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 2
  episodes_total: 5509
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6743103411441522
          cur_lr: 5.000000000000001e-05
          entropy: 0.827462496360143
          entropy_coeff: 0.009999999999999998
          kl: 0.004011213196314382
          policy_loss: -0.09067406588130528
          total_loss: 1.1900216824478573
          vf_explained_var: -0.21464940905570984
          vf_loss: 1.2862655544446575
    num_agent_steps_sampled: 1665000
    num_agent_steps_trained: 1665000
    num_steps_sampled: 1665000
    num_steps_trained: 1665000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1665,44463.3,1665000,-51.809,-26.4,-95.5,518.09


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1666000
  custom_metrics: {}
  date: 2021-10-29_09-28-00
  done: false
  episode_len_mean: 514.36
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -51.43600000000022
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5512
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3371551705720761
          cur_lr: 5.000000000000001e-05
          entropy: 0.8006709860430823
          entropy_coeff: 0.009999999999999998
          kl: 0.003733125267089892
          policy_loss: 0.03551631205611759
          total_loss: 1.1324822972218196
          vf_explained_var: -0.038838211447000504
          vf_loss: 1.1037140518426896
    num_agent_steps_sampled: 1666000
    num_agent_steps_trained: 1666000
    num_steps_sampled: 1666000
    num_steps_trained: 1666000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1666,44483.4,1666000,-51.436,-26.4,-95.5,514.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1667000
  custom_metrics: {}
  date: 2021-10-29_09-28-17
  done: false
  episode_len_mean: 513.5
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -51.35000000000021
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 2
  episodes_total: 5514
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16857758528603806
          cur_lr: 5.000000000000001e-05
          entropy: 0.8103653026951684
          entropy_coeff: 0.009999999999999998
          kl: 0.010822044852693013
          policy_loss: -0.0805758535861969
          total_loss: 1.2728278911776012
          vf_explained_var: -0.3008013665676117
          vf_loss: 1.359683036721415
    num_agent_steps_sampled: 1667000
    num_agent_steps_trained: 1667000
    num_steps_sampled: 1667000
    num_steps_trained: 1667000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1667,44501.2,1667000,-51.35,-26.4,-95.5,513.5


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1668000
  custom_metrics: {}
  date: 2021-10-29_09-28-35
  done: false
  episode_len_mean: 511.62
  episode_media: {}
  episode_reward_max: -26.400000000000105
  episode_reward_mean: -51.16200000000022
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 2
  episodes_total: 5516
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.16857758528603806
          cur_lr: 5.000000000000001e-05
          entropy: 1.4427013205157386
          entropy_coeff: 0.009999999999999998
          kl: 0.022209162973841973
          policy_loss: -0.01569145437743929
          total_loss: 0.529134309457408
          vf_explained_var: 0.343007355928421
          vf_loss: 0.5555088067013356
    num_agent_steps_sampled: 1668000
    num_agent_steps_trained: 1668000
    num_steps_sampled: 1668000
    num_steps_trained: 1668000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1668,44519.2,1668000,-51.162,-26.4,-95.5,511.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1669000
  custom_metrics: {}
  date: 2021-10-29_09-28-56
  done: false
  episode_len_mean: 511.99
  episode_media: {}
  episode_reward_max: -25.900000000000098
  episode_reward_mean: -51.19900000000023
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5519
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2528663779290571
          cur_lr: 5.000000000000001e-05
          entropy: 0.854962886042065
          entropy_coeff: 0.009999999999999998
          kl: 0.011491973618134968
          policy_loss: 0.016722587247689564
          total_loss: 1.4149905171659258
          vf_explained_var: -0.29069042205810547
          vf_loss: 1.4039116153286564
    num_agent_steps_sampled: 1669000
    num_agent_steps_trained: 1669000
    num_steps_sampled: 1669000
    num_steps_trained: 1669000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1669,44539.7,1669000,-51.199,-25.9,-95.5,511.99




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1670000
  custom_metrics: {}
  date: 2021-10-29_09-29-38
  done: false
  episode_len_mean: 505.06
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -50.50600000000023
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5522
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2528663779290571
          cur_lr: 5.000000000000001e-05
          entropy: 1.1292237599690755
          entropy_coeff: 0.009999999999999998
          kl: 0.01198424508854564
          policy_loss: -0.030891084919373195
          total_loss: 1.207672013839086
          vf_explained_var: 0.08443830162286758
          vf_loss: 1.2468249149620534
    num_agent_steps_sampled: 1670000
    num_agent_steps_trained: 1670000
    num_steps_sampled: 1670000
    num_steps_trained: 1670000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1670,44581.3,1670000,-50.506,-25.3,-95.5,505.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1671000
  custom_metrics: {}
  date: 2021-10-29_09-30-01
  done: false
  episode_len_mean: 500.18
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -50.018000000000214
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5525
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2528663779290571
          cur_lr: 5.000000000000001e-05
          entropy: 1.252067401011785
          entropy_coeff: 0.009999999999999998
          kl: 0.018518821072692995
          policy_loss: 0.009399919708569845
          total_loss: 0.9689213378561867
          vf_explained_var: 0.274465411901474
          vf_loss: 0.9673593140310711
    num_agent_steps_sampled: 1671000
    num_agent_steps_trained: 1671000
    num_steps_sampled: 1671000
    num_steps_trained: 1671000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1671,44604.5,1671000,-50.018,-25.3,-95.5,500.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1672000
  custom_metrics: {}
  date: 2021-10-29_09-30-20
  done: false
  episode_len_mean: 499.97
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -49.99700000000021
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 2
  episodes_total: 5527
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2528663779290571
          cur_lr: 5.000000000000001e-05
          entropy: 1.212016729513804
          entropy_coeff: 0.009999999999999998
          kl: 0.02353695739145753
          policy_loss: -0.10254316346512901
          total_loss: 1.2557980948024325
          vf_explained_var: -0.3214133679866791
          vf_loss: 1.3645097153054344
    num_agent_steps_sampled: 1672000
    num_agent_steps_trained: 1672000
    num_steps_sampled: 1672000
    num_steps_trained: 1672000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1672,44623.7,1672000,-49.997,-25.3,-95.5,499.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1673000
  custom_metrics: {}
  date: 2021-10-29_09-30-41
  done: false
  episode_len_mean: 497.02
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -49.702000000000226
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5530
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37929956689358574
          cur_lr: 5.000000000000001e-05
          entropy: 1.0770523375935024
          entropy_coeff: 0.009999999999999998
          kl: 0.01652425763623234
          policy_loss: -0.10175725685225592
          total_loss: 1.196597977148162
          vf_explained_var: 0.39209428429603577
          vf_loss: 1.3028581202030183
    num_agent_steps_sampled: 1673000
    num_agent_steps_trained: 1673000
    num_steps_sampled: 1673000
    num_steps_trained: 1673000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1673,44645.2,1673000,-49.702,-25.3,-95.5,497.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1674000
  custom_metrics: {}
  date: 2021-10-29_09-31-03
  done: false
  episode_len_mean: 494.58
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -49.45800000000022
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5533
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37929956689358574
          cur_lr: 5.000000000000001e-05
          entropy: 1.1852653529908923
          entropy_coeff: 0.009999999999999998
          kl: 0.014933441972350497
          policy_loss: 0.06400963796509637
          total_loss: 0.7666023340490129
          vf_explained_var: 0.190481036901474
          vf_loss: 0.7087810918274853
    num_agent_steps_sampled: 1674000
    num_agent_steps_trained: 1674000
    num_steps_sampled: 1674000
    num_steps_trained: 1674000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1674,44666.7,1674000,-49.458,-25.3,-95.5,494.58


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1675000
  custom_metrics: {}
  date: 2021-10-29_09-31-24
  done: false
  episode_len_mean: 487.74
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -48.77400000000022
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5536
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37929956689358574
          cur_lr: 5.000000000000001e-05
          entropy: 1.0030286974377103
          entropy_coeff: 0.009999999999999998
          kl: 0.005443531631096398
          policy_loss: 0.02909910066260232
          total_loss: 1.2501765767733255
          vf_explained_var: 0.12496928870677948
          vf_loss: 1.2290430234538183
    num_agent_steps_sampled: 1675000
    num_agent_steps_trained: 1675000
    num_steps_sampled: 1675000
    num_steps_trained: 1675000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1675,44687.6,1675000,-48.774,-25.3,-95.5,487.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1676000
  custom_metrics: {}
  date: 2021-10-29_09-31-47
  done: false
  episode_len_mean: 473.36
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -47.33600000000023
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5539
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37929956689358574
          cur_lr: 5.000000000000001e-05
          entropy: 1.7324101448059082
          entropy_coeff: 0.009999999999999998
          kl: 0.029378274694792594
          policy_loss: 0.11326592332786983
          total_loss: 1.069200141562356
          vf_explained_var: 0.41938352584838867
          vf_loss: 0.9621151526769002
    num_agent_steps_sampled: 1676000
    num_agent_steps_trained: 1676000
    num_steps_sampled: 1676000
    num_steps_trained: 1676000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1676,44710.5,1676000,-47.336,-25.3,-95.5,473.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1677000
  custom_metrics: {}
  date: 2021-10-29_09-32-04
  done: false
  episode_len_mean: 470.68
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -47.06800000000023
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 2
  episodes_total: 5541
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.0564024892118242
          entropy_coeff: 0.009999999999999998
          kl: 0.00554103736720069
          policy_loss: 0.0901024834977256
          total_loss: 0.7458753320905898
          vf_explained_var: -0.6305976510047913
          vf_loss: 0.6631842986163166
    num_agent_steps_sampled: 1677000
    num_agent_steps_trained: 1677000
    num_steps_sampled: 1677000
    num_steps_trained: 1677000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1677,44727.2,1677000,-47.068,-25.3,-95.5,470.68


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1678000
  custom_metrics: {}
  date: 2021-10-29_09-32-23
  done: false
  episode_len_mean: 465.18
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -46.51800000000022
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 2
  episodes_total: 5543
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.3245793395572238
          entropy_coeff: 0.009999999999999998
          kl: 0.009406130841524096
          policy_loss: -0.07006658158368534
          total_loss: 0.8979642440875372
          vf_explained_var: -0.003550578374415636
          vf_loss: 0.975925008373128
    num_agent_steps_sampled: 1678000
    num_agent_steps_trained: 1678000
    num_steps_sampled: 1678000
    num_steps_trained: 1678000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1678,44746.5,1678000,-46.518,-25.3,-95.5,465.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1679000
  custom_metrics: {}
  date: 2021-10-29_09-32-44
  done: false
  episode_len_mean: 460.67
  episode_media: {}
  episode_reward_max: -25.30000000000009
  episode_reward_mean: -46.06700000000024
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5546
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.3187036726209853
          entropy_coeff: 0.009999999999999998
          kl: 0.015675641037750567
          policy_loss: 0.08943124777740902
          total_loss: 1.0513750533262889
          vf_explained_var: 0.4144313931465149
          vf_loss: 0.9662121964825524
    num_agent_steps_sampled: 1679000
    num_agent_steps_trained: 1679000
    num_steps_sampled: 1679000
    num_steps_trained: 1679000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1679,44767.4,1679000,-46.067,-25.3,-95.5,460.67


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1680000
  custom_metrics: {}
  date: 2021-10-29_09-33-05
  done: false
  episode_len_mean: 450.65
  episode_media: {}
  episode_reward_max: -24.60000000000008
  episode_reward_mean: -45.065000000000246
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 2
  episodes_total: 5548
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.1548116114404468
          entropy_coeff: 0.009999999999999998
          kl: 0.010054566506817825
          policy_loss: -0.06266924854781893
          total_loss: 0.9464615816871326
          vf_explained_var: 0.1782069057226181
          vf_loss: 1.0149583978785408
    num_agent_steps_sampled: 1680000
    num_agent_steps_trained: 1680000
    num_steps_sampled: 1680000
    num_steps_trained: 1680000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1680,44788.4,1680000,-45.065,-24.6,-95.5,450.65




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1681000
  custom_metrics: {}
  date: 2021-10-29_09-33-49
  done: false
  episode_len_mean: 429.24
  episode_media: {}
  episode_reward_max: -23.800000000000068
  episode_reward_mean: -42.92400000000028
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 4
  episodes_total: 5552
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.2442568600177766
          entropy_coeff: 0.009999999999999998
          kl: 0.01901172516635717
          policy_loss: -0.010510638025071887
          total_loss: 1.6830129504203797
          vf_explained_var: 0.1303180754184723
          vf_loss: 1.695149454805586
    num_agent_steps_sampled: 1681000
    num_agent_steps_trained: 1681000
    num_steps_sampled: 1681000
    num_steps_trained: 1681000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1681,44832.3,1681000,-42.924,-23.8,-95.5,429.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1682000
  custom_metrics: {}
  date: 2021-10-29_09-34-12
  done: false
  episode_len_mean: 416.84
  episode_media: {}
  episode_reward_max: -23.800000000000068
  episode_reward_mean: -41.68400000000029
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5555
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.0373747203085157
          entropy_coeff: 0.009999999999999998
          kl: 0.015907269390170793
          policy_loss: 0.028158890869882373
          total_loss: 1.1631830566459231
          vf_explained_var: 0.12325018644332886
          vf_loss: 1.1363474745717315
    num_agent_steps_sampled: 1682000
    num_agent_steps_trained: 1682000
    num_steps_sampled: 1682000
    num_steps_trained: 1682000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1682,44856,1682000,-41.684,-23.8,-95.5,416.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1683000
  custom_metrics: {}
  date: 2021-10-29_09-34-38
  done: false
  episode_len_mean: 408.52
  episode_media: {}
  episode_reward_max: -23.800000000000068
  episode_reward_mean: -40.852000000000274
  episode_reward_min: -95.49999999999885
  episodes_this_iter: 3
  episodes_total: 5558
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.5776332020759583
          entropy_coeff: 0.009999999999999998
          kl: 0.008399801805044089
          policy_loss: 0.0381706479522917
          total_loss: 0.9503810766670439
          vf_explained_var: -0.15336847305297852
          vf_loss: 0.9232076867587037
    num_agent_steps_sampled: 1683000
    num_agent_steps_trained: 1683000
    num_steps_sampled: 1683000
    num_steps_trained: 1683000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1683,44881.6,1683000,-40.852,-23.8,-95.5,408.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1684000
  custom_metrics: {}
  date: 2021-10-29_09-35-04
  done: false
  episode_len_mean: 395.29
  episode_media: {}
  episode_reward_max: -23.800000000000068
  episode_reward_mean: -39.52900000000028
  episode_reward_min: -80.99999999999967
  episodes_this_iter: 4
  episodes_total: 5562
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.1938684847619798
          entropy_coeff: 0.009999999999999998
          kl: 0.007933240925965713
          policy_loss: 0.04787938528590732
          total_loss: 1.3253444486194186
          vf_explained_var: 0.2460537850856781
          vf_loss: 1.2848901430765787
    num_agent_steps_sampled: 1684000
    num_agent_steps_trained: 1684000
    num_steps_sampled: 1684000
    num_steps_trained: 1684000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1684,44907.5,1684000,-39.529,-23.8,-81,395.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1685000
  custom_metrics: {}
  date: 2021-10-29_09-35-28
  done: false
  episode_len_mean: 388.23
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -38.82300000000028
  episode_reward_min: -62.80000000000062
  episodes_this_iter: 3
  episodes_total: 5565
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.4494603236516317
          entropy_coeff: 0.009999999999999998
          kl: 0.016389278280854094
          policy_loss: 0.05622136857774523
          total_loss: 1.0128447400199043
          vf_explained_var: 0.31412839889526367
          vf_loss: 0.9617932917343246
    num_agent_steps_sampled: 1685000
    num_agent_steps_trained: 1685000
    num_steps_sampled: 1685000
    num_steps_trained: 1685000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1685,44931.6,1685000,-38.823,-23.1,-62.8,388.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1686000
  custom_metrics: {}
  date: 2021-10-29_09-35-54
  done: false
  episode_len_mean: 386.27
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -38.62700000000027
  episode_reward_min: -62.80000000000062
  episodes_this_iter: 3
  episodes_total: 5568
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.3553595503171285
          entropy_coeff: 0.009999999999999998
          kl: 0.013405186363402934
          policy_loss: 0.0723954975605011
          total_loss: 1.0399209923214383
          vf_explained_var: 0.23412960767745972
          vf_loss: 0.9734522170490689
    num_agent_steps_sampled: 1686000
    num_agent_steps_trained: 1686000
    num_steps_sampled: 1686000
    num_steps_trained: 1686000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1686,44957.3,1686000,-38.627,-23.1,-62.8,386.27


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1687000
  custom_metrics: {}
  date: 2021-10-29_09-36-19
  done: false
  episode_len_mean: 379.17
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -37.917000000000264
  episode_reward_min: -62.80000000000062
  episodes_this_iter: 4
  episodes_total: 5572
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.4216647028923035
          entropy_coeff: 0.009999999999999998
          kl: 0.0058133712442522915
          policy_loss: 0.020131958607170316
          total_loss: 1.4048162208663093
          vf_explained_var: 0.13863703608512878
          vf_loss: 1.3955934087435404
    num_agent_steps_sampled: 1687000
    num_agent_steps_trained: 1687000
    num_steps_sampled: 1687000
    num_steps_trained: 1687000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1687,44982.6,1687000,-37.917,-23.1,-62.8,379.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1688000
  custom_metrics: {}
  date: 2021-10-29_09-36-42
  done: false
  episode_len_mean: 375.74
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -37.57400000000027
  episode_reward_min: -62.80000000000062
  episodes_this_iter: 3
  episodes_total: 5575
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 1.588762672742208
          entropy_coeff: 0.009999999999999998
          kl: 0.008433963729981444
          policy_loss: 0.06234803133540683
          total_loss: 1.0469906803634432
          vf_explained_var: 0.08763568848371506
          vf_loss: 0.9957317844861083
    num_agent_steps_sampled: 1688000
    num_agent_steps_trained: 1688000
    num_steps_sampled: 1688000
    num_steps_trained: 1688000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1688,45005.6,1688000,-37.574,-23.1,-62.8,375.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1689000
  custom_metrics: {}
  date: 2021-10-29_09-37-09
  done: false
  episode_len_mean: 373.23
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -37.32300000000026
  episode_reward_min: -62.80000000000062
  episodes_this_iter: 3
  episodes_total: 5578
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5689493503403785
          cur_lr: 5.000000000000001e-05
          entropy: 0.8132434705893199
          entropy_coeff: 0.009999999999999998
          kl: 0.004238736497302398
          policy_loss: -0.015516041219234467
          total_loss: 1.0686023894283507
          vf_explained_var: 0.3046945333480835
          vf_loss: 1.0898392276631461
    num_agent_steps_sampled: 1689000
    num_agent_steps_trained: 1689000
    num_steps_sampled: 1689000
    num_steps_trained: 1689000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1689,45032.4,1689000,-37.323,-23.1,-62.8,373.23




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1690000
  custom_metrics: {}
  date: 2021-10-29_09-37-54
  done: false
  episode_len_mean: 366.47
  episode_media: {}
  episode_reward_max: -23.10000000000006
  episode_reward_mean: -36.647000000000254
  episode_reward_min: -62.80000000000062
  episodes_this_iter: 4
  episodes_total: 5582
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28447467517018926
          cur_lr: 5.000000000000001e-05
          entropy: 1.3242938809924656
          entropy_coeff: 0.009999999999999998
          kl: 0.010116343717434795
          policy_loss: 0.010063544164101284
          total_loss: 1.2634389042854308
          vf_explained_var: 0.2055598348379135
          vf_loss: 1.2637404600779216
    num_agent_steps_sampled: 1690000
    num_agent_steps_trained: 1690000
    num_steps_sampled: 1690000
    num_steps_trained: 1690000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1690,45077.1,1690000,-36.647,-23.1,-62.8,366.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1691000
  custom_metrics: {}
  date: 2021-10-29_09-38-23
  done: false
  episode_len_mean: 359.31
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -35.931000000000246
  episode_reward_min: -62.80000000000062
  episodes_this_iter: 4
  episodes_total: 5586
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28447467517018926
          cur_lr: 5.000000000000001e-05
          entropy: 0.979273482826021
          entropy_coeff: 0.009999999999999998
          kl: 0.01773811613665209
          policy_loss: 0.02325287875201967
          total_loss: 1.2473569413026173
          vf_explained_var: 0.22734808921813965
          vf_loss: 1.2288507488038805
    num_agent_steps_sampled: 1691000
    num_agent_steps_trained: 1691000
    num_steps_sampled: 1691000
    num_steps_trained: 1691000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1691,45106.4,1691000,-35.931,-23,-62.8,359.31


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1692000
  custom_metrics: {}
  date: 2021-10-29_09-38-49
  done: false
  episode_len_mean: 354.88
  episode_media: {}
  episode_reward_max: -23.000000000000057
  episode_reward_mean: -35.48800000000024
  episode_reward_min: -58.90000000000057
  episodes_this_iter: 3
  episodes_total: 5589
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28447467517018926
          cur_lr: 5.000000000000001e-05
          entropy: 1.1563325769371458
          entropy_coeff: 0.009999999999999998
          kl: 0.010112045279443609
          policy_loss: 0.06337863521443474
          total_loss: 0.7495168911086189
          vf_explained_var: 0.1969495564699173
          vf_loss: 0.6948249492380354
    num_agent_steps_sampled: 1692000
    num_agent_steps_trained: 1692000
    num_steps_sampled: 1692000
    num_steps_trained: 1692000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1692,45132.4,1692000,-35.488,-23,-58.9,354.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1693000
  custom_metrics: {}
  date: 2021-10-29_09-39-18
  done: false
  episode_len_mean: 349.47
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -34.94700000000022
  episode_reward_min: -58.90000000000057
  episodes_this_iter: 4
  episodes_total: 5593
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28447467517018926
          cur_lr: 5.000000000000001e-05
          entropy: 0.7348271038797166
          entropy_coeff: 0.009999999999999998
          kl: 0.005827166159015502
          policy_loss: 0.022135455740822686
          total_loss: 1.2728144513236153
          vf_explained_var: 0.1845388412475586
          vf_loss: 1.2563695788383484
    num_agent_steps_sampled: 1693000
    num_agent_steps_trained: 1693000
    num_steps_sampled: 1693000
    num_steps_trained: 1693000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1693,45161.6,1693000,-34.947,-22.8,-58.9,349.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1694000
  custom_metrics: {}
  date: 2021-10-29_09-39-44
  done: false
  episode_len_mean: 342.13
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -34.213000000000214
  episode_reward_min: -55.300000000000516
  episodes_this_iter: 3
  episodes_total: 5596
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28447467517018926
          cur_lr: 5.000000000000001e-05
          entropy: 1.0594531887107426
          entropy_coeff: 0.009999999999999998
          kl: 0.0068022186516233695
          policy_loss: -0.0944860910375913
          total_loss: 1.1002135826481714
          vf_explained_var: 0.30899155139923096
          vf_loss: 1.2033591475751666
    num_agent_steps_sampled: 1694000
    num_agent_steps_trained: 1694000
    num_steps_sampled: 1694000
    num_steps_trained: 16940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1694,45187.1,1694000,-34.213,-22.8,-55.3,342.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1695000
  custom_metrics: {}
  date: 2021-10-29_09-40-08
  done: false
  episode_len_mean: 342.0
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -34.200000000000216
  episode_reward_min: -55.300000000000516
  episodes_this_iter: 4
  episodes_total: 5600
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28447467517018926
          cur_lr: 5.000000000000001e-05
          entropy: 1.1278070744540956
          entropy_coeff: 0.009999999999999998
          kl: 0.010233332016278338
          policy_loss: 0.0033006726453701654
          total_loss: 1.539984279208713
          vf_explained_var: 0.169271782040596
          vf_loss: 1.5450505495071412
    num_agent_steps_sampled: 1695000
    num_agent_steps_trained: 1695000
    num_steps_sampled: 1695000
    num_steps_trained: 1695000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1695,45211.8,1695000,-34.2,-22.8,-55.3,342


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1696000
  custom_metrics: {}
  date: 2021-10-29_09-40-35
  done: false
  episode_len_mean: 340.6
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -34.060000000000215
  episode_reward_min: -55.300000000000516
  episodes_this_iter: 3
  episodes_total: 5603
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.28447467517018926
          cur_lr: 5.000000000000001e-05
          entropy: 1.3221655488014221
          entropy_coeff: 0.009999999999999998
          kl: 0.022887614168749654
          policy_loss: 0.05591324054532581
          total_loss: 1.0253318399190903
          vf_explained_var: -0.14673206210136414
          vf_loss: 0.9761293144689666
    num_agent_steps_sampled: 1696000
    num_agent_steps_trained: 1696000
    num_steps_sampled: 1696000
    num_steps_trained: 169600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1696,45238.6,1696000,-34.06,-22.8,-55.3,340.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1697000
  custom_metrics: {}
  date: 2021-10-29_09-40-59
  done: false
  episode_len_mean: 335.06
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -33.50600000000021
  episode_reward_min: -55.300000000000516
  episodes_this_iter: 3
  episodes_total: 5606
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42671201275528403
          cur_lr: 5.000000000000001e-05
          entropy: 0.9308066308498383
          entropy_coeff: 0.009999999999999998
          kl: 0.01095145450533429
          policy_loss: -0.10273124054074287
          total_loss: 1.5659395880169338
          vf_explained_var: 0.12843185663223267
          vf_loss: 1.6733057843314276
    num_agent_steps_sampled: 1697000
    num_agent_steps_trained: 1697000
    num_steps_sampled: 1697000
    num_steps_trained: 1697000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1697,45262,1697000,-33.506,-22.8,-55.3,335.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1698000
  custom_metrics: {}
  date: 2021-10-29_09-41-21
  done: false
  episode_len_mean: 328.93
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -32.8930000000002
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 3
  episodes_total: 5609
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.42671201275528403
          cur_lr: 5.000000000000001e-05
          entropy: 1.0791124767727323
          entropy_coeff: 0.009999999999999998
          kl: 0.021740188032546588
          policy_loss: -0.10214109251068698
          total_loss: 1.449626800749037
          vf_explained_var: 0.18856962025165558
          vf_loss: 1.5532822291056314
    num_agent_steps_sampled: 1698000
    num_agent_steps_trained: 1698000
    num_steps_sampled: 1698000
    num_steps_trained: 1698000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1698,45284.7,1698000,-32.893,-22.8,-52,328.93




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1699000
  custom_metrics: {}
  date: 2021-10-29_09-42-10
  done: false
  episode_len_mean: 325.29
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -32.529000000000195
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 4
  episodes_total: 5613
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6400680191329259
          cur_lr: 5.000000000000001e-05
          entropy: 0.7331187440289392
          entropy_coeff: 0.009999999999999998
          kl: 0.01025355594863863
          policy_loss: -0.08856716603040696
          total_loss: 1.5269155694378747
          vf_explained_var: 0.1433296501636505
          vf_loss: 1.6162509520848591
    num_agent_steps_sampled: 1699000
    num_agent_steps_trained: 1699000
    num_steps_sampled: 1699000
    num_steps_trained: 1699000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1699,45333.3,1699000,-32.529,-22.8,-52,325.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1700000
  custom_metrics: {}
  date: 2021-10-29_09-42-35
  done: false
  episode_len_mean: 318.82
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -31.88200000000018
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 4
  episodes_total: 5617
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6400680191329259
          cur_lr: 5.000000000000001e-05
          entropy: 1.4441626813676622
          entropy_coeff: 0.009999999999999998
          kl: 0.008240027036067153
          policy_loss: 0.04076718332038985
          total_loss: 1.3154670510027144
          vf_explained_var: 0.3071775734424591
          vf_loss: 1.283867311477661
    num_agent_steps_sampled: 1700000
    num_agent_steps_trained: 1700000
    num_steps_sampled: 1700000
    num_steps_trained: 1700000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1700,45357.7,1700000,-31.882,-22.8,-52,318.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1701000
  custom_metrics: {}
  date: 2021-10-29_09-42-56
  done: false
  episode_len_mean: 319.33
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -31.933000000000188
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 2
  episodes_total: 5619
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6400680191329259
          cur_lr: 5.000000000000001e-05
          entropy: 1.5586668107244703
          entropy_coeff: 0.009999999999999998
          kl: 0.01301486157171049
          policy_loss: -0.06446697314580281
          total_loss: 1.0118137068218656
          vf_explained_var: -0.2067219465970993
          vf_loss: 1.0835369461112552
    num_agent_steps_sampled: 1701000
    num_agent_steps_trained: 1701000
    num_steps_sampled: 1701000
    num_steps_trained: 1701000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1701,45379.5,1701000,-31.933,-22.8,-52,319.33


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1702000
  custom_metrics: {}
  date: 2021-10-29_09-43-20
  done: false
  episode_len_mean: 318.22
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -31.82200000000018
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 3
  episodes_total: 5622
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6400680191329259
          cur_lr: 5.000000000000001e-05
          entropy: 1.0285658286677466
          entropy_coeff: 0.009999999999999998
          kl: 0.017928405706571324
          policy_loss: -0.12688915812306933
          total_loss: 1.441931853029463
          vf_explained_var: 0.15813179314136505
          vf_loss: 1.5676312618785435
    num_agent_steps_sampled: 1702000
    num_agent_steps_trained: 1702000
    num_steps_sampled: 1702000
    num_steps_trained: 1702000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1702,45403,1702000,-31.822,-22.8,-52,318.22


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1703000
  custom_metrics: {}
  date: 2021-10-29_09-43-44
  done: false
  episode_len_mean: 319.9
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -31.990000000000183
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 3
  episodes_total: 5625
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6400680191329259
          cur_lr: 5.000000000000001e-05
          entropy: 1.6362894588046604
          entropy_coeff: 0.009999999999999998
          kl: 0.01432392293377881
          policy_loss: -0.07730358673466577
          total_loss: 0.9928640358977847
          vf_explained_var: 0.20851430296897888
          vf_loss: 1.0773622161812253
    num_agent_steps_sampled: 1703000
    num_agent_steps_trained: 1703000
    num_steps_sampled: 1703000
    num_steps_trained: 1703000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1703,45426.8,1703000,-31.99,-22.8,-52,319.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1704000
  custom_metrics: {}
  date: 2021-10-29_09-44-10
  done: false
  episode_len_mean: 314.28
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -31.428000000000175
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 4
  episodes_total: 5629
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6400680191329259
          cur_lr: 5.000000000000001e-05
          entropy: 0.7690743260913425
          entropy_coeff: 0.009999999999999998
          kl: 0.0095582198586411
          policy_loss: -0.03629486486315727
          total_loss: 1.015989550617006
          vf_explained_var: 0.30449408292770386
          vf_loss: 1.0538572344515058
    num_agent_steps_sampled: 1704000
    num_agent_steps_trained: 1704000
    num_steps_sampled: 1704000
    num_steps_trained: 1704000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1704,45453.5,1704000,-31.428,-22.8,-52,314.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1705000
  custom_metrics: {}
  date: 2021-10-29_09-44-38
  done: false
  episode_len_mean: 310.97
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -31.097000000000175
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 4
  episodes_total: 5633
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6400680191329259
          cur_lr: 5.000000000000001e-05
          entropy: 1.5802058921919928
          entropy_coeff: 0.009999999999999998
          kl: 0.02599899261371582
          policy_loss: 0.030873375799920826
          total_loss: 0.8893558300203748
          vf_explained_var: 0.37859904766082764
          vf_loss: 0.8576433810922834
    num_agent_steps_sampled: 1705000
    num_agent_steps_trained: 1705000
    num_steps_sampled: 1705000
    num_steps_trained: 1705000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1705,45480.9,1705000,-31.097,-22.8,-52,310.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1706000
  custom_metrics: {}
  date: 2021-10-29_09-45-07
  done: false
  episode_len_mean: 307.73
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.77300000000017
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 3
  episodes_total: 5636
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.960102028699389
          cur_lr: 5.000000000000001e-05
          entropy: 0.8918811413976881
          entropy_coeff: 0.009999999999999998
          kl: 0.003907594890799417
          policy_loss: -0.08984772546423805
          total_loss: 0.8681350655025906
          vf_explained_var: 0.21432821452617645
          vf_loss: 0.9631499263975355
    num_agent_steps_sampled: 1706000
    num_agent_steps_trained: 1706000
    num_steps_sampled: 1706000
    num_steps_trained: 1706000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1706,45510.4,1706000,-30.773,-22.8,-52,307.73


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1707000
  custom_metrics: {}
  date: 2021-10-29_09-45-30
  done: false
  episode_len_mean: 307.47
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.747000000000167
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 3
  episodes_total: 5639
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4800510143496945
          cur_lr: 5.000000000000001e-05
          entropy: 1.5239773015181224
          entropy_coeff: 0.009999999999999998
          kl: 0.02119270863474709
          policy_loss: -0.0922468892402119
          total_loss: 0.6342052565680609
          vf_explained_var: 0.3109357953071594
          vf_loss: 0.731518335474862
    num_agent_steps_sampled: 1707000
    num_agent_steps_trained: 1707000
    num_steps_sampled: 1707000
    num_steps_trained: 1707000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1707,45533.1,1707000,-30.747,-22.8,-52,307.47




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1708000
  custom_metrics: {}
  date: 2021-10-29_09-46-20
  done: false
  episode_len_mean: 300.76
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -30.076000000000153
  episode_reward_min: -52.00000000000047
  episodes_this_iter: 4
  episodes_total: 5643
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7200765215245416
          cur_lr: 5.000000000000001e-05
          entropy: 0.7836055881447263
          entropy_coeff: 0.009999999999999998
          kl: 0.008414137807882425
          policy_loss: -0.1431813088970052
          total_loss: 0.8367723120583428
          vf_explained_var: 0.49900022149086
          vf_loss: 0.9817308571603564
    num_agent_steps_sampled: 1708000
    num_agent_steps_trained: 1708000
    num_steps_sampled: 1708000
    num_steps_trained: 1708000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1708,45583,1708000,-30.076,-22.8,-52,300.76


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1709000
  custom_metrics: {}
  date: 2021-10-29_09-46-48
  done: false
  episode_len_mean: 296.86
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.686000000000153
  episode_reward_min: -50.80000000000045
  episodes_this_iter: 4
  episodes_total: 5647
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7200765215245416
          cur_lr: 5.000000000000001e-05
          entropy: 1.324669380320443
          entropy_coeff: 0.009999999999999998
          kl: 0.012796968184343843
          policy_loss: 0.04756161363588439
          total_loss: 0.577283202111721
          vf_explained_var: 0.2726372182369232
          vf_loss: 0.533753486474355
    num_agent_steps_sampled: 1709000
    num_agent_steps_trained: 1709000
    num_steps_sampled: 1709000
    num_steps_trained: 1709000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1709,45610.7,1709000,-29.686,-22.8,-50.8,296.86


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1710000
  custom_metrics: {}
  date: 2021-10-29_09-47-17
  done: false
  episode_len_mean: 293.1
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.310000000000144
  episode_reward_min: -45.300000000000374
  episodes_this_iter: 4
  episodes_total: 5651
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7200765215245416
          cur_lr: 5.000000000000001e-05
          entropy: 0.8798009918795692
          entropy_coeff: 0.009999999999999998
          kl: 0.009411406103180298
          policy_loss: 0.06923572048544883
          total_loss: 0.8335489153861999
          vf_explained_var: 0.20725053548812866
          vf_loss: 0.7663342833518982
    num_agent_steps_sampled: 1710000
    num_agent_steps_trained: 1710000
    num_steps_sampled: 1710000
    num_steps_trained: 1710000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1710,45639.9,1710000,-29.31,-22.8,-45.3,293.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1711000
  custom_metrics: {}
  date: 2021-10-29_09-47-44
  done: false
  episode_len_mean: 291.96
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.19600000000014
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5654
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7200765215245416
          cur_lr: 5.000000000000001e-05
          entropy: 1.3933887872431012
          entropy_coeff: 0.009999999999999998
          kl: 0.012262480589893383
          policy_loss: -0.09400145262479782
          total_loss: 0.6720680327879058
          vf_explained_var: 0.2451552450656891
          vf_loss: 0.7711734427346124
    num_agent_steps_sampled: 1711000
    num_agent_steps_trained: 1711000
    num_steps_sampled: 1711000
    num_steps_trained: 1711000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1711,45667,1711000,-29.196,-22.8,-45.1,291.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1712000
  custom_metrics: {}
  date: 2021-10-29_09-48-08
  done: false
  episode_len_mean: 291.34
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.134000000000142
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5658
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7200765215245416
          cur_lr: 5.000000000000001e-05
          entropy: 1.5636634548505148
          entropy_coeff: 0.009999999999999998
          kl: 0.008750982648278378
          policy_loss: 0.06131915648778279
          total_loss: 0.7970020655128691
          vf_explained_var: 0.251461923122406
          vf_loss: 0.7450181676281823
    num_agent_steps_sampled: 1712000
    num_agent_steps_trained: 1712000
    num_steps_sampled: 1712000
    num_steps_trained: 1712000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1712,45691.3,1712000,-29.134,-22.8,-45.1,291.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1713000
  custom_metrics: {}
  date: 2021-10-29_09-48-35
  done: false
  episode_len_mean: 291.99
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -29.199000000000147
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5661
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7200765215245416
          cur_lr: 5.000000000000001e-05
          entropy: 1.5942921333842808
          entropy_coeff: 0.009999999999999998
          kl: 0.016450206402541787
          policy_loss: 0.004432819369766448
          total_loss: 0.6993018239736557
          vf_explained_var: 0.37295395135879517
          vf_loss: 0.6989665290133821
    num_agent_steps_sampled: 1713000
    num_agent_steps_trained: 1713000
    num_steps_sampled: 1713000
    num_steps_trained: 1713000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1713,45717.6,1713000,-29.199,-22.8,-45.1,291.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1714000
  custom_metrics: {}
  date: 2021-10-29_09-49-03
  done: false
  episode_len_mean: 289.8
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.980000000000146
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5665
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7200765215245416
          cur_lr: 5.000000000000001e-05
          entropy: 0.934126098950704
          entropy_coeff: 0.009999999999999998
          kl: 0.012333138290593838
          policy_loss: -0.012035913351509307
          total_loss: 1.3254371126492819
          vf_explained_var: 0.13558313250541687
          vf_loss: 1.3379335072305467
    num_agent_steps_sampled: 1714000
    num_agent_steps_trained: 1714000
    num_steps_sampled: 1714000
    num_steps_trained: 1714000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1714,45746.1,1714000,-28.98,-22.8,-45.1,289.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1715000
  custom_metrics: {}
  date: 2021-10-29_09-49-30
  done: false
  episode_len_mean: 289.31
  episode_media: {}
  episode_reward_max: -22.800000000000054
  episode_reward_mean: -28.931000000000143
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5668
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7200765215245416
          cur_lr: 5.000000000000001e-05
          entropy: 1.7378935780790117
          entropy_coeff: 0.009999999999999998
          kl: 0.01162960359107057
          policy_loss: -0.005567943387561374
          total_loss: 0.9991592218478521
          vf_explained_var: 0.05852625519037247
          vf_loss: 1.013731893317567
    num_agent_steps_sampled: 1715000
    num_agent_steps_trained: 1715000
    num_steps_sampled: 1715000
    num_steps_trained: 1715000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1715,45772.5,1715000,-28.931,-22.8,-45.1,289.31




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1716000
  custom_metrics: {}
  date: 2021-10-29_09-50-14
  done: false
  episode_len_mean: 289.42
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.942000000000142
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5672
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7200765215245416
          cur_lr: 5.000000000000001e-05
          entropy: 1.1913432766993841
          entropy_coeff: 0.009999999999999998
          kl: 0.07354958289485865
          policy_loss: 0.04480684047771825
          total_loss: 1.4349492463800642
          vf_explained_var: 0.3863371014595032
          vf_loss: 1.349094520509243
    num_agent_steps_sampled: 1716000
    num_agent_steps_trained: 1716000
    num_steps_sampled: 1716000
    num_steps_trained: 1716000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1716,45816.9,1716000,-28.942,-21.5,-45.1,289.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1717000
  custom_metrics: {}
  date: 2021-10-29_09-50-43
  done: false
  episode_len_mean: 286.58
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.658000000000133
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5676
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.080114782286813
          cur_lr: 5.000000000000001e-05
          entropy: 1.0608204159471724
          entropy_coeff: 0.009999999999999998
          kl: 0.013509676775285308
          policy_loss: -0.012168283512194952
          total_loss: 0.7307965609762404
          vf_explained_var: 0.5848702192306519
          vf_loss: 0.7389810515774621
    num_agent_steps_sampled: 1717000
    num_agent_steps_trained: 1717000
    num_steps_sampled: 1717000
    num_steps_trained: 1717000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1717,45845.6,1717000,-28.658,-21.5,-45.1,286.58


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1718000
  custom_metrics: {}
  date: 2021-10-29_09-51-08
  done: false
  episode_len_mean: 287.04
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.704000000000136
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5679
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.080114782286813
          cur_lr: 5.000000000000001e-05
          entropy: 1.614759585592482
          entropy_coeff: 0.009999999999999998
          kl: 0.014193718094299217
          policy_loss: 0.0297573650876681
          total_loss: 0.6133345603942871
          vf_explained_var: 0.16659517586231232
          vf_loss: 0.5843939517107275
    num_agent_steps_sampled: 1718000
    num_agent_steps_trained: 1718000
    num_steps_sampled: 1718000
    num_steps_trained: 1718000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1718,45870.9,1718000,-28.704,-21.5,-45.1,287.04


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1719000
  custom_metrics: {}
  date: 2021-10-29_09-51-37
  done: false
  episode_len_mean: 287.19
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.719000000000136
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5683
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.080114782286813
          cur_lr: 5.000000000000001e-05
          entropy: 1.2684441248575846
          entropy_coeff: 0.009999999999999998
          kl: 0.006729866740389509
          policy_loss: -0.021359695659743414
          total_loss: 1.1122117598851522
          vf_explained_var: 0.2262738049030304
          vf_loss: 1.1389868742889828
    num_agent_steps_sampled: 1719000
    num_agent_steps_trained: 1719000
    num_steps_sampled: 1719000
    num_steps_trained: 1719000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1719,45899.8,1719000,-28.719,-21.5,-45.1,287.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1720000
  custom_metrics: {}
  date: 2021-10-29_09-52-04
  done: false
  episode_len_mean: 288.13
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.813000000000137
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5686
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.080114782286813
          cur_lr: 5.000000000000001e-05
          entropy: 1.8366437686814203
          entropy_coeff: 0.009999999999999998
          kl: 0.009674454516057867
          policy_loss: -0.04330543751517932
          total_loss: 0.7376040488481521
          vf_explained_var: 0.27833905816078186
          vf_loss: 0.7888263973924848
    num_agent_steps_sampled: 1720000
    num_agent_steps_trained: 1720000
    num_steps_sampled: 1720000
    num_steps_trained: 1720000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1720,45926.7,1720000,-28.813,-21.5,-45.1,288.13


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1721000
  custom_metrics: {}
  date: 2021-10-29_09-52-32
  done: false
  episode_len_mean: 287.34
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.73400000000014
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5690
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.080114782286813
          cur_lr: 5.000000000000001e-05
          entropy: 1.2857196900579664
          entropy_coeff: 0.009999999999999998
          kl: 0.007914113165866847
          policy_loss: 0.09103854215807385
          total_loss: 0.7687301983435949
          vf_explained_var: 0.3422664999961853
          vf_loss: 0.6820007062620587
    num_agent_steps_sampled: 1721000
    num_agent_steps_trained: 1721000
    num_steps_sampled: 1721000
    num_steps_trained: 1721000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1721,45954.7,1721000,-28.734,-21.5,-45.1,287.34


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1722000
  custom_metrics: {}
  date: 2021-10-29_09-52-58
  done: false
  episode_len_mean: 287.82
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.782000000000142
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5694
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.080114782286813
          cur_lr: 5.000000000000001e-05
          entropy: 1.781746702724033
          entropy_coeff: 0.009999999999999998
          kl: 0.007673646122072321
          policy_loss: 0.012840319259299172
          total_loss: 0.9729415529304081
          vf_explained_var: 0.23583215475082397
          vf_loss: 0.9696302864286634
    num_agent_steps_sampled: 1722000
    num_agent_steps_trained: 1722000
    num_steps_sampled: 1722000
    num_steps_trained: 1722000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1722,45980.6,1722000,-28.782,-21.5,-45.1,287.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1723000
  custom_metrics: {}
  date: 2021-10-29_09-53-28
  done: false
  episode_len_mean: 286.63
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.663000000000142
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5698
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.080114782286813
          cur_lr: 5.000000000000001e-05
          entropy: 0.903241636355718
          entropy_coeff: 0.009999999999999998
          kl: 0.0044359898078435355
          policy_loss: 0.019053092847267788
          total_loss: 1.0278514007727304
          vf_explained_var: 0.345626562833786
          vf_loss: 1.013039359781477
    num_agent_steps_sampled: 1723000
    num_agent_steps_trained: 1723000
    num_steps_sampled: 1723000
    num_steps_trained: 1723000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1723,46010.4,1723000,-28.663,-21.5,-45.1,286.63




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1724000
  custom_metrics: {}
  date: 2021-10-29_09-54-13
  done: false
  episode_len_mean: 284.61
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.46100000000014
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5701
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5400573911434065
          cur_lr: 5.000000000000001e-05
          entropy: 1.314332636859682
          entropy_coeff: 0.009999999999999998
          kl: 0.0134328009688973
          policy_loss: 0.04339626000987159
          total_loss: 0.9671124849054549
          vf_explained_var: 0.6453262567520142
          vf_loss: 0.929605073067877
    num_agent_steps_sampled: 1724000
    num_agent_steps_trained: 1724000
    num_steps_sampled: 1724000
    num_steps_trained: 1724000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1724,46055.9,1724000,-28.461,-21.5,-45.1,284.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1725000
  custom_metrics: {}
  date: 2021-10-29_09-54-39
  done: false
  episode_len_mean: 285.81
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.58100000000014
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5705
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5400573911434065
          cur_lr: 5.000000000000001e-05
          entropy: 1.3284127758608923
          entropy_coeff: 0.009999999999999998
          kl: 0.03381076693034773
          policy_loss: -0.02688291221857071
          total_loss: 1.0477752685546875
          vf_explained_var: 0.45253652334213257
          vf_loss: 1.0696825481123395
    num_agent_steps_sampled: 1725000
    num_agent_steps_trained: 1725000
    num_steps_sampled: 1725000
    num_steps_trained: 1725000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1725,46081.9,1725000,-28.581,-21.5,-45.1,285.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1726000
  custom_metrics: {}
  date: 2021-10-29_09-55-05
  done: false
  episode_len_mean: 284.12
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.412000000000134
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5708
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8100860867151095
          cur_lr: 5.000000000000001e-05
          entropy: 1.1954922318458556
          entropy_coeff: 0.009999999999999998
          kl: 0.025078413807580604
          policy_loss: -0.04874794963333342
          total_loss: 0.6282111687792672
          vf_explained_var: 0.030455082654953003
          vf_loss: 0.6685983598232269
    num_agent_steps_sampled: 1726000
    num_agent_steps_trained: 1726000
    num_steps_sampled: 1726000
    num_steps_trained: 172600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1726,46108,1726000,-28.412,-21.5,-45.1,284.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1727000
  custom_metrics: {}
  date: 2021-10-29_09-55-33
  done: false
  episode_len_mean: 283.28
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.328000000000134
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 4
  episodes_total: 5712
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2151291300726648
          cur_lr: 5.000000000000001e-05
          entropy: 1.094776795970069
          entropy_coeff: 0.009999999999999998
          kl: 0.006410932805044607
          policy_loss: 0.018860608008172776
          total_loss: 1.1969331992997063
          vf_explained_var: 0.3948930501937866
          vf_loss: 1.1812302476829952
    num_agent_steps_sampled: 1727000
    num_agent_steps_trained: 1727000
    num_steps_sampled: 1727000
    num_steps_trained: 1727000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1727,46135.9,1727000,-28.328,-21.5,-45.1,283.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1728000
  custom_metrics: {}
  date: 2021-10-29_09-55-59
  done: false
  episode_len_mean: 284.59
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.45900000000013
  episode_reward_min: -45.10000000000037
  episodes_this_iter: 3
  episodes_total: 5715
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2151291300726648
          cur_lr: 5.000000000000001e-05
          entropy: 1.0934888707266914
          entropy_coeff: 0.009999999999999998
          kl: 0.005719571510592421
          policy_loss: -0.04619786370959547
          total_loss: 1.0036790914005704
          vf_explained_var: 0.481564998626709
          vf_loss: 1.053861833943261
    num_agent_steps_sampled: 1728000
    num_agent_steps_trained: 1728000
    num_steps_sampled: 1728000
    num_steps_trained: 1728000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1728,46161.3,1728000,-28.459,-21.5,-45.1,284.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1729000
  custom_metrics: {}
  date: 2021-10-29_09-56-27
  done: false
  episode_len_mean: 280.84
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.08400000000013
  episode_reward_min: -43.900000000000354
  episodes_this_iter: 4
  episodes_total: 5719
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2151291300726648
          cur_lr: 5.000000000000001e-05
          entropy: 1.04953277375963
          entropy_coeff: 0.009999999999999998
          kl: 0.005385139986488162
          policy_loss: 0.026625231487883462
          total_loss: 1.1912267274326749
          vf_explained_var: 0.48913493752479553
          vf_loss: 1.1685531735420227
    num_agent_steps_sampled: 1729000
    num_agent_steps_trained: 1729000
    num_steps_sampled: 1729000
    num_steps_trained: 1729000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1729,46189.2,1729000,-28.084,-21.5,-43.9,280.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1730000
  custom_metrics: {}
  date: 2021-10-29_09-56-58
  done: false
  episode_len_mean: 277.7
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.770000000000127
  episode_reward_min: -43.900000000000354
  episodes_this_iter: 4
  episodes_total: 5723
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2151291300726648
          cur_lr: 5.000000000000001e-05
          entropy: 0.8202168650097317
          entropy_coeff: 0.009999999999999998
          kl: 0.007903974522893896
          policy_loss: 0.004484263931711515
          total_loss: 0.8246970719761318
          vf_explained_var: 0.6830153465270996
          vf_loss: 0.8188106305069394
    num_agent_steps_sampled: 1730000
    num_agent_steps_trained: 1730000
    num_steps_sampled: 1730000
    num_steps_trained: 1730000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1730,46220.4,1730000,-27.77,-21.5,-43.9,277.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1731000
  custom_metrics: {}
  date: 2021-10-29_09-57-25
  done: false
  episode_len_mean: 276.13
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.61300000000012
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 5727
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2151291300726648
          cur_lr: 5.000000000000001e-05
          entropy: 0.8151654594474369
          entropy_coeff: 0.009999999999999998
          kl: 0.0045034001676225125
          policy_loss: 0.018357067182660104
          total_loss: 0.9709693643781874
          vf_explained_var: 0.5277616381645203
          vf_loss: 0.9552917308277554
    num_agent_steps_sampled: 1731000
    num_agent_steps_trained: 1731000
    num_steps_sampled: 1731000
    num_steps_trained: 1731000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1731,46247.9,1731000,-27.613,-21.5,-40.7,276.13




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1732000
  custom_metrics: {}
  date: 2021-10-29_09-58-09
  done: false
  episode_len_mean: 276.12
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.612000000000126
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 5730
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6075645650363324
          cur_lr: 5.000000000000001e-05
          entropy: 0.9355302684836917
          entropy_coeff: 0.009999999999999998
          kl: 0.007456200467179199
          policy_loss: -0.01882386306921641
          total_loss: 0.6463611122634676
          vf_explained_var: 0.6448656916618347
          vf_loss: 0.6700101592474513
    num_agent_steps_sampled: 1732000
    num_agent_steps_trained: 1732000
    num_steps_sampled: 1732000
    num_steps_trained: 1732000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1732,46291.5,1732000,-27.612,-21.5,-40.7,276.12


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1733000
  custom_metrics: {}
  date: 2021-10-29_09-58-36
  done: false
  episode_len_mean: 276.57
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.657000000000124
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 4
  episodes_total: 5734
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6075645650363324
          cur_lr: 5.000000000000001e-05
          entropy: 2.00701554218928
          entropy_coeff: 0.009999999999999998
          kl: 0.021005775865032206
          policy_loss: -0.06074002948072221
          total_loss: 0.9388204005029467
          vf_explained_var: 0.4352911114692688
          vf_loss: 1.0068682180510626
    num_agent_steps_sampled: 1733000
    num_agent_steps_trained: 1733000
    num_steps_sampled: 1733000
    num_steps_trained: 1733000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1733,46318.5,1733000,-27.657,-21.5,-40.7,276.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1734000
  custom_metrics: {}
  date: 2021-10-29_09-59-01
  done: false
  episode_len_mean: 277.15
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.715000000000124
  episode_reward_min: -40.70000000000031
  episodes_this_iter: 3
  episodes_total: 5737
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9113468475544981
          cur_lr: 5.000000000000001e-05
          entropy: 1.4435333145989313
          entropy_coeff: 0.009999999999999998
          kl: 0.008485031863896906
          policy_loss: -0.09720487048228582
          total_loss: 0.9334081484211816
          vf_explained_var: 0.4444098174571991
          vf_loss: 1.0373155471351412
    num_agent_steps_sampled: 1734000
    num_agent_steps_trained: 1734000
    num_steps_sampled: 1734000
    num_steps_trained: 1734000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1734,46343.8,1734000,-27.715,-21.5,-40.7,277.15


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1735000
  custom_metrics: {}
  date: 2021-10-29_09-59-25
  done: false
  episode_len_mean: 277.77
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.777000000000122
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5740
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9113468475544981
          cur_lr: 5.000000000000001e-05
          entropy: 1.586328191889657
          entropy_coeff: 0.009999999999999998
          kl: 0.02322728153281525
          policy_loss: -0.023537739449077184
          total_loss: 0.7583351959784825
          vf_explained_var: -0.024730056524276733
          vf_loss: 0.7765681022571193
    num_agent_steps_sampled: 1735000
    num_agent_steps_trained: 1735000
    num_steps_sampled: 1735000
    num_steps_trained: 173500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1735,46367.4,1735000,-27.777,-21.5,-44.5,277.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1736000
  custom_metrics: {}
  date: 2021-10-29_09-59-49
  done: false
  episode_len_mean: 279.47
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -27.947000000000127
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 4
  episodes_total: 5744
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3670202713317474
          cur_lr: 5.000000000000001e-05
          entropy: 1.1472966432571412
          entropy_coeff: 0.009999999999999998
          kl: 0.008868021889254408
          policy_loss: -0.20815311438507503
          total_loss: 0.943012245827251
          vf_explained_var: 0.6078745126724243
          vf_loss: 1.1505155593984657
    num_agent_steps_sampled: 1736000
    num_agent_steps_trained: 1736000
    num_steps_sampled: 1736000
    num_steps_trained: 1736000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1736,46391.6,1736000,-27.947,-21.5,-44.5,279.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1737000
  custom_metrics: {}
  date: 2021-10-29_10-00-14
  done: false
  episode_len_mean: 280.49
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.049000000000124
  episode_reward_min: -44.50000000000036
  episodes_this_iter: 3
  episodes_total: 5747
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3670202713317474
          cur_lr: 5.000000000000001e-05
          entropy: 1.5290320482518938
          entropy_coeff: 0.009999999999999998
          kl: 0.017790779672239212
          policy_loss: 0.0026991965042220223
          total_loss: 0.5665823413266076
          vf_explained_var: 0.6646060347557068
          vf_loss: 0.5548531085252761
    num_agent_steps_sampled: 1737000
    num_agent_steps_trained: 1737000
    num_steps_sampled: 1737000
    num_steps_trained: 1737000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1737,46416.2,1737000,-28.049,-21.5,-44.5,280.49


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1738000
  custom_metrics: {}
  date: 2021-10-29_10-00-36
  done: false
  episode_len_mean: 283.52
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.35200000000013
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5750
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3670202713317474
          cur_lr: 5.000000000000001e-05
          entropy: 1.4465340157349904
          entropy_coeff: 0.009999999999999998
          kl: 0.012667912113738883
          policy_loss: 0.18590631315277684
          total_loss: 0.6260865247497956
          vf_explained_var: 0.32922470569610596
          vf_loss: 0.4373282579187718
    num_agent_steps_sampled: 1738000
    num_agent_steps_trained: 1738000
    num_steps_sampled: 1738000
    num_steps_trained: 1738000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1738,46438.3,1738000,-28.352,-21.5,-50.1,283.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1739000
  custom_metrics: {}
  date: 2021-10-29_10-01-00
  done: false
  episode_len_mean: 285.47
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.547000000000136
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5753
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3670202713317474
          cur_lr: 5.000000000000001e-05
          entropy: 1.4865010446972318
          entropy_coeff: 0.009999999999999998
          kl: 0.009219023838255088
          policy_loss: 0.03612520860301124
          total_loss: 0.7276535229550467
          vf_explained_var: 0.4347371459007263
          vf_loss: 0.6937907293438912
    num_agent_steps_sampled: 1739000
    num_agent_steps_trained: 1739000
    num_steps_sampled: 1739000
    num_steps_trained: 1739000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1739,46462.6,1739000,-28.547,-21.5,-50.1,285.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1740000
  custom_metrics: {}
  date: 2021-10-29_10-01-26
  done: false
  episode_len_mean: 285.6
  episode_media: {}
  episode_reward_max: -21.500000000000036
  episode_reward_mean: -28.56000000000013
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5756
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3670202713317474
          cur_lr: 5.000000000000001e-05
          entropy: 1.5954764895968967
          entropy_coeff: 0.009999999999999998
          kl: 0.005638934703250895
          policy_loss: 0.00953958746459749
          total_loss: 0.6290567169586817
          vf_explained_var: 0.3523971736431122
          vf_loss: 0.6277633657885923
    num_agent_steps_sampled: 1740000
    num_agent_steps_trained: 1740000
    num_steps_sampled: 1740000
    num_steps_trained: 1740000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1740,46488.5,1740000,-28.56,-21.5,-50.1,285.6




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1741000
  custom_metrics: {}
  date: 2021-10-29_10-02-10
  done: false
  episode_len_mean: 283.75
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.375000000000128
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5760
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3670202713317474
          cur_lr: 5.000000000000001e-05
          entropy: 1.2927636209461424
          entropy_coeff: 0.009999999999999998
          kl: 0.033713278575816234
          policy_loss: 0.11804534966746966
          total_loss: 1.1740946491559348
          vf_explained_var: 0.3270624279975891
          vf_loss: 1.022890199555291
    num_agent_steps_sampled: 1741000
    num_agent_steps_trained: 1741000
    num_steps_sampled: 1741000
    num_steps_trained: 1741000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1741,46532,1741000,-28.375,-20.1,-50.1,283.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1742000
  custom_metrics: {}
  date: 2021-10-29_10-02-36
  done: false
  episode_len_mean: 286.18
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.618000000000137
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5763
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0505304069976207
          cur_lr: 5.000000000000001e-05
          entropy: 1.6513166281912062
          entropy_coeff: 0.009999999999999998
          kl: 0.00579180128205776
          policy_loss: -0.0644854903842012
          total_loss: 1.063016643292374
          vf_explained_var: -0.2262054830789566
          vf_loss: 1.1321390245523717
    num_agent_steps_sampled: 1742000
    num_agent_steps_trained: 1742000
    num_steps_sampled: 1742000
    num_steps_trained: 1742000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1742,46557.8,1742000,-28.618,-20.1,-50.1,286.18


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1743000
  custom_metrics: {}
  date: 2021-10-29_10-03-02
  done: false
  episode_len_mean: 285.32
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.532000000000135
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5766
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0505304069976207
          cur_lr: 5.000000000000001e-05
          entropy: 1.1041754182842043
          entropy_coeff: 0.009999999999999998
          kl: 0.004964309058098119
          policy_loss: -0.004219148639175627
          total_loss: 0.9510809623532825
          vf_explained_var: 0.5190646052360535
          vf_loss: 0.9561624032755692
    num_agent_steps_sampled: 1743000
    num_agent_steps_trained: 1743000
    num_steps_sampled: 1743000
    num_steps_trained: 1743000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1743,46584.2,1743000,-28.532,-20.1,-50.1,285.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1744000
  custom_metrics: {}
  date: 2021-10-29_10-03-27
  done: false
  episode_len_mean: 287.48
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.74800000000014
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5769
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 1.9628577325079175
          entropy_coeff: 0.009999999999999998
          kl: 0.009267595604731267
          policy_loss: -0.07366491887304517
          total_loss: 0.7281610134575102
          vf_explained_var: 0.4586915373802185
          vf_loss: 0.8119527619952956
    num_agent_steps_sampled: 1744000
    num_agent_steps_trained: 1744000
    num_steps_sampled: 1744000
    num_steps_trained: 1744000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1744,46608.8,1744000,-28.748,-20.1,-50.1,287.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1745000
  custom_metrics: {}
  date: 2021-10-29_10-03-51
  done: false
  episode_len_mean: 288.45
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.845000000000137
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5773
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 1.8256554047266642
          entropy_coeff: 0.009999999999999998
          kl: 0.007429802954121673
          policy_loss: -0.010902309500508838
          total_loss: 0.74732373158137
          vf_explained_var: 0.44449958205223083
          vf_loss: 0.7688650753762987
    num_agent_steps_sampled: 1745000
    num_agent_steps_trained: 1745000
    num_steps_sampled: 1745000
    num_steps_trained: 1745000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1745,46633.5,1745000,-28.845,-20.1,-50.1,288.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1746000
  custom_metrics: {}
  date: 2021-10-29_10-04-14
  done: false
  episode_len_mean: 289.98
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.99800000000014
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5776
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 1.642898866203096
          entropy_coeff: 0.009999999999999998
          kl: 0.0055156600604358945
          policy_loss: 0.1869105454120371
          total_loss: 0.7627451946338017
          vf_explained_var: 0.6459958553314209
          vf_loss: 0.5866086181667116
    num_agent_steps_sampled: 1746000
    num_agent_steps_trained: 1746000
    num_steps_sampled: 1746000
    num_steps_trained: 1746000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1746,46656.2,1746000,-28.998,-20.1,-50.1,289.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1747000
  custom_metrics: {}
  date: 2021-10-29_10-04-36
  done: false
  episode_len_mean: 291.44
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.144000000000148
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 2
  episodes_total: 5778
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 1.8628923720783657
          entropy_coeff: 0.009999999999999998
          kl: 0.010698870014858149
          policy_loss: -0.046558913836876555
          total_loss: 0.6963987320661544
          vf_explained_var: 0.5491335391998291
          vf_loss: 0.750617394141025
    num_agent_steps_sampled: 1747000
    num_agent_steps_trained: 1747000
    num_steps_sampled: 1747000
    num_steps_trained: 1747000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1747,46677.8,1747000,-29.144,-20.1,-50.1,291.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1748000
  custom_metrics: {}
  date: 2021-10-29_10-05-01
  done: false
  episode_len_mean: 292.59
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.259000000000142
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5782
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 1.5467942449781629
          entropy_coeff: 0.009999999999999998
          kl: 0.012096873273030953
          policy_loss: 0.04254763308498594
          total_loss: 0.7662470115555657
          vf_explained_var: 0.5888993740081787
          vf_loss: 0.7267648117409812
    num_agent_steps_sampled: 1748000
    num_agent_steps_trained: 1748000
    num_steps_sampled: 1748000
    num_steps_trained: 1748000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1748,46703.3,1748000,-29.259,-20.1,-50.1,292.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1749000
  custom_metrics: {}
  date: 2021-10-29_10-05-26
  done: false
  episode_len_mean: 293.8
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.380000000000145
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5785
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 1.6359651101960075
          entropy_coeff: 0.009999999999999998
          kl: 0.005536927829899301
          policy_loss: 0.1175040726032522
          total_loss: 0.8365821926130189
          vf_explained_var: 0.18308469653129578
          vf_loss: 0.7297609517557754
    num_agent_steps_sampled: 1749000
    num_agent_steps_trained: 1749000
    num_steps_sampled: 1749000
    num_steps_trained: 1749000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1749,46727.7,1749000,-29.38,-20.1,-50.1,293.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1750000
  custom_metrics: {}
  date: 2021-10-29_10-05-54
  done: false
  episode_len_mean: 293.17
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.317000000000142
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5789
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 1.0166600681013531
          entropy_coeff: 0.009999999999999998
          kl: 0.00787230136728962
          policy_loss: 0.04808682029445966
          total_loss: 0.8743802352084054
          vf_explained_var: 0.42923223972320557
          vf_loss: 0.8283888230721156
    num_agent_steps_sampled: 1750000
    num_agent_steps_trained: 1750000
    num_steps_sampled: 1750000
    num_steps_trained: 1750000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1750,46756.6,1750000,-29.317,-20.1,-50.1,293.17




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1751000
  custom_metrics: {}
  date: 2021-10-29_10-06-40
  done: false
  episode_len_mean: 293.3
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.33000000000015
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5792
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 1.5228329890304142
          entropy_coeff: 0.009999999999999998
          kl: 0.01068702083338585
          policy_loss: -0.07724562138319016
          total_loss: 0.6755082368850708
          vf_explained_var: 0.6350871324539185
          vf_loss: 0.7570251643657684
    num_agent_steps_sampled: 1751000
    num_agent_steps_trained: 1751000
    num_steps_sampled: 1751000
    num_steps_trained: 1751000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1751,46801.7,1751000,-29.33,-20.1,-50.1,293.3


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1752000
  custom_metrics: {}
  date: 2021-10-29_10-07-10
  done: false
  episode_len_mean: 293.45
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.34500000000015
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5796
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 1.0530932426452637
          entropy_coeff: 0.009999999999999998
          kl: 0.005232692282083839
          policy_loss: -0.00041222050786018374
          total_loss: 0.7678463962343004
          vf_explained_var: 0.5153647661209106
          vf_loss: 0.7734246551162667
    num_agent_steps_sampled: 1752000
    num_agent_steps_trained: 1752000
    num_steps_sampled: 1752000
    num_steps_trained: 175200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1752,46831.9,1752000,-29.345,-20.1,-50.1,293.45


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1753000
  custom_metrics: {}
  date: 2021-10-29_10-07-38
  done: false
  episode_len_mean: 292.1
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.210000000000147
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5800
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0252652034988103
          cur_lr: 5.000000000000001e-05
          entropy: 0.9987575077348285
          entropy_coeff: 0.009999999999999998
          kl: 0.003500323627329038
          policy_loss: -0.07058885817726453
          total_loss: 0.9840491228633457
          vf_explained_var: 0.2817115783691406
          vf_loss: 1.0610368046495648
    num_agent_steps_sampled: 1753000
    num_agent_steps_trained: 1753000
    num_steps_sampled: 1753000
    num_steps_trained: 1753000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1753,46859.7,1753000,-29.21,-20.1,-50.1,292.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1754000
  custom_metrics: {}
  date: 2021-10-29_10-08-05
  done: false
  episode_len_mean: 293.02
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.302000000000138
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5804
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5126326017494052
          cur_lr: 5.000000000000001e-05
          entropy: 1.3935844580332437
          entropy_coeff: 0.009999999999999998
          kl: 0.013737574346656543
          policy_loss: 0.0049162214001019795
          total_loss: 0.8800759345293045
          vf_explained_var: 0.47701361775398254
          vf_loss: 0.8820532242457072
    num_agent_steps_sampled: 1754000
    num_agent_steps_trained: 1754000
    num_steps_sampled: 1754000
    num_steps_trained: 175400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1754,46887.1,1754000,-29.302,-20.1,-50.1,293.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1755000
  custom_metrics: {}
  date: 2021-10-29_10-08-32
  done: false
  episode_len_mean: 291.88
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.188000000000148
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5807
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5126326017494052
          cur_lr: 5.000000000000001e-05
          entropy: 1.1392674095100828
          entropy_coeff: 0.009999999999999998
          kl: 0.004196755227259634
          policy_loss: -0.01511472248368793
          total_loss: 1.0148654010560778
          vf_explained_var: 0.3007787764072418
          vf_loss: 1.0392214046584236
    num_agent_steps_sampled: 1755000
    num_agent_steps_trained: 1755000
    num_steps_sampled: 1755000
    num_steps_trained: 1755000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1755,46914.4,1755000,-29.188,-20.1,-50.1,291.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1756000
  custom_metrics: {}
  date: 2021-10-29_10-08-58
  done: false
  episode_len_mean: 292.7
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.27000000000015
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5811
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2563163008747026
          cur_lr: 5.000000000000001e-05
          entropy: 1.0104753760827911
          entropy_coeff: 0.009999999999999998
          kl: 0.02616543078823036
          policy_loss: 0.014040093868970871
          total_loss: 0.7390707221296099
          vf_explained_var: 0.2627708613872528
          vf_loss: 0.7284287467909356
    num_agent_steps_sampled: 1756000
    num_agent_steps_trained: 1756000
    num_steps_sampled: 1756000
    num_steps_trained: 1756000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1756,46939.9,1756000,-29.27,-20.1,-50.1,292.7


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1757000
  custom_metrics: {}
  date: 2021-10-29_10-09-25
  done: false
  episode_len_mean: 293.23
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.323000000000153
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5814
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.38447445131205393
          cur_lr: 5.000000000000001e-05
          entropy: 1.7266266769833034
          entropy_coeff: 0.009999999999999998
          kl: 0.024633985937145927
          policy_loss: -0.18722167677349513
          total_loss: 0.2600889126045836
          vf_explained_var: 0.7520186305046082
          vf_loss: 0.4551057214538256
    num_agent_steps_sampled: 1757000
    num_agent_steps_trained: 1757000
    num_steps_sampled: 1757000
    num_steps_trained: 1757000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1757,46967.2,1757000,-29.323,-20.1,-50.1,293.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1758000
  custom_metrics: {}
  date: 2021-10-29_10-09-54
  done: false
  episode_len_mean: 292.11
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.211000000000148
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5818
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.576711676968081
          cur_lr: 5.000000000000001e-05
          entropy: 1.216731705268224
          entropy_coeff: 0.009999999999999998
          kl: 0.008323837151546816
          policy_loss: -0.07700124084949493
          total_loss: 0.43379428337017695
          vf_explained_var: 0.6796525716781616
          vf_loss: 0.5181623809867435
    num_agent_steps_sampled: 1758000
    num_agent_steps_trained: 1758000
    num_steps_sampled: 1758000
    num_steps_trained: 1758000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1758,46996.4,1758000,-29.211,-20.1,-50.1,292.11




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1759000
  custom_metrics: {}
  date: 2021-10-29_10-10-40
  done: false
  episode_len_mean: 292.97
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.29700000000015
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5822
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.576711676968081
          cur_lr: 5.000000000000001e-05
          entropy: 1.2642842875586615
          entropy_coeff: 0.009999999999999998
          kl: 0.007673307196144365
          policy_loss: 0.049974190526538426
          total_loss: 0.5390539581576983
          vf_explained_var: 0.7946239709854126
          vf_loss: 0.4972973206804858
    num_agent_steps_sampled: 1759000
    num_agent_steps_trained: 1759000
    num_steps_sampled: 1759000
    num_steps_trained: 1759000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1759,47041.7,1759000,-29.297,-20.1,-50.1,292.97


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1760000
  custom_metrics: {}
  date: 2021-10-29_10-11-08
  done: false
  episode_len_mean: 293.19
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.319000000000145
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5826
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.576711676968081
          cur_lr: 5.000000000000001e-05
          entropy: 1.0963286214404635
          entropy_coeff: 0.009999999999999998
          kl: 0.042216417221608224
          policy_loss: 0.12593278346790207
          total_loss: 1.018268209695816
          vf_explained_var: 0.42894694209098816
          vf_loss: 0.8789520074923833
    num_agent_steps_sampled: 1760000
    num_agent_steps_trained: 1760000
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1760,47070.1,1760000,-29.319,-20.1,-50.1,293.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1761000
  custom_metrics: {}
  date: 2021-10-29_10-11-38
  done: false
  episode_len_mean: 291.42
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.142000000000145
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5830
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 0.7449490659766727
          entropy_coeff: 0.009999999999999998
          kl: 0.0066602334897803905
          policy_loss: 0.05268658059131768
          total_loss: 1.1708268960316977
          vf_explained_var: 0.27341362833976746
          vf_loss: 1.1198282466994391
    num_agent_steps_sampled: 1761000
    num_agent_steps_trained: 1761000
    num_steps_sampled: 1761000
    num_steps_trained: 1761000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1761,47100.2,1761000,-29.142,-20.1,-50.1,291.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1762000
  custom_metrics: {}
  date: 2021-10-29_10-12-09
  done: false
  episode_len_mean: 290.02
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -29.002000000000145
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5834
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 0.9834372636344698
          entropy_coeff: 0.009999999999999998
          kl: 0.009993394930751343
          policy_loss: -0.039058295140663786
          total_loss: 0.7899384942319658
          vf_explained_var: 0.4763247072696686
          vf_loss: 0.8301861908700731
    num_agent_steps_sampled: 1762000
    num_agent_steps_trained: 1762000
    num_steps_sampled: 1762000
    num_steps_trained: 1762000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1762,47130.5,1762000,-29.002,-20.1,-50.1,290.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1763000
  custom_metrics: {}
  date: 2021-10-29_10-12-38
  done: false
  episode_len_mean: 288.06
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.806000000000143
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5838
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 1.3386399646600087
          entropy_coeff: 0.009999999999999998
          kl: 0.007173415752444375
          policy_loss: -0.023750105914142398
          total_loss: 0.6149034354421827
          vf_explained_var: 0.6541423201560974
          vf_loss: 0.645834460357825
    num_agent_steps_sampled: 1763000
    num_agent_steps_trained: 1763000
    num_steps_sampled: 1763000
    num_steps_trained: 1763000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1763,47159.4,1763000,-28.806,-20.1,-50.1,288.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1764000
  custom_metrics: {}
  date: 2021-10-29_10-13-07
  done: false
  episode_len_mean: 285.44
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.544000000000132
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5842
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 1.1547812309530046
          entropy_coeff: 0.009999999999999998
          kl: 0.008859703871763793
          policy_loss: -0.0770788174536493
          total_loss: 0.5090982852710618
          vf_explained_var: 0.7408952713012695
          vf_loss: 0.590060671667258
    num_agent_steps_sampled: 1764000
    num_agent_steps_trained: 1764000
    num_steps_sampled: 1764000
    num_steps_trained: 1764000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1764,47188.4,1764000,-28.544,-20.1,-50.1,285.44


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1765000
  custom_metrics: {}
  date: 2021-10-29_10-13-35
  done: false
  episode_len_mean: 284.02
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.402000000000136
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 4
  episodes_total: 5846
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 1.1388526919815276
          entropy_coeff: 0.009999999999999998
          kl: 0.017789474241223187
          policy_loss: -0.0020347171359592015
          total_loss: 0.8659931540489196
          vf_explained_var: 0.6633812189102173
          vf_loss: 0.8640272984902064
    num_agent_steps_sampled: 1765000
    num_agent_steps_trained: 1765000
    num_steps_sampled: 1765000
    num_steps_trained: 176500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1765,47216.5,1765000,-28.402,-20.1,-50.1,284.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1766000
  custom_metrics: {}
  date: 2021-10-29_10-14-00
  done: false
  episode_len_mean: 283.89
  episode_media: {}
  episode_reward_max: -20.100000000000016
  episode_reward_mean: -28.389000000000134
  episode_reward_min: -50.10000000000044
  episodes_this_iter: 3
  episodes_total: 5849
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 1.6200510289933947
          entropy_coeff: 0.009999999999999998
          kl: 0.01091711522282398
          policy_loss: 0.08544951155781746
          total_loss: 0.6538438793685701
          vf_explained_var: 0.3190654218196869
          vf_loss: 0.5751508386598693
    num_agent_steps_sampled: 1766000
    num_agent_steps_trained: 1766000
    num_steps_sampled: 1766000
    num_steps_trained: 1766000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1766,47241.4,1766000,-28.389,-20.1,-50.1,283.89




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1767000
  custom_metrics: {}
  date: 2021-10-29_10-14-48
  done: false
  episode_len_mean: 278.74
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -27.874000000000123
  episode_reward_min: -42.600000000000335
  episodes_this_iter: 4
  episodes_total: 5853
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 0.7171133667230606
          entropy_coeff: 0.009999999999999998
          kl: 0.008162618244127234
          policy_loss: -0.053493387252092364
          total_loss: 0.9926466716660394
          vf_explained_var: 0.5467017292976379
          vf_loss: 1.0462499717871347
    num_agent_steps_sampled: 1767000
    num_agent_steps_trained: 1767000
    num_steps_sampled: 1767000
    num_steps_trained: 176700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1767,47290.2,1767000,-27.874,-19.2,-42.6,278.74


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1768000
  custom_metrics: {}
  date: 2021-10-29_10-15-16
  done: false
  episode_len_mean: 277.66
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -27.766000000000123
  episode_reward_min: -42.600000000000335
  episodes_this_iter: 4
  episodes_total: 5857
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 1.1344959865013757
          entropy_coeff: 0.009999999999999998
          kl: 0.007342483293686542
          policy_loss: 0.04730674227078756
          total_loss: 0.76444397535589
          vf_explained_var: 0.6665109992027283
          vf_loss: 0.7221304555733998
    num_agent_steps_sampled: 1768000
    num_agent_steps_trained: 1768000
    num_steps_sampled: 1768000
    num_steps_trained: 1768000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1768,47317.9,1768000,-27.766,-19.2,-42.6,277.66


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1769000
  custom_metrics: {}
  date: 2021-10-29_10-15-43
  done: false
  episode_len_mean: 277.8
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -27.780000000000122
  episode_reward_min: -42.600000000000335
  episodes_this_iter: 3
  episodes_total: 5860
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 1.3094840844472249
          entropy_coeff: 0.009999999999999998
          kl: 0.0077117712973394726
          policy_loss: -0.07629615246421761
          total_loss: 0.8580914732482698
          vf_explained_var: 0.2264346182346344
          vf_loss: 0.9408112628592386
    num_agent_steps_sampled: 1769000
    num_agent_steps_trained: 1769000
    num_steps_sampled: 1769000
    num_steps_trained: 1769000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1769,47344.7,1769000,-27.78,-19.2,-42.6,277.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1770000
  custom_metrics: {}
  date: 2021-10-29_10-16-12
  done: false
  episode_len_mean: 276.1
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -27.610000000000117
  episode_reward_min: -42.600000000000335
  episodes_this_iter: 4
  episodes_total: 5864
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 1.2773746788501739
          entropy_coeff: 0.009999999999999998
          kl: 0.01119780042829489
          policy_loss: -0.029180695157912044
          total_loss: 1.1945988436539967
          vf_explained_var: 0.1675131916999817
          vf_loss: 1.226866426732805
    num_agent_steps_sampled: 1770000
    num_agent_steps_trained: 1770000
    num_steps_sampled: 1770000
    num_steps_trained: 1770000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1770,47373.3,1770000,-27.61,-19.2,-42.6,276.1


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1771000
  custom_metrics: {}
  date: 2021-10-29_10-16-43
  done: false
  episode_len_mean: 272.99
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -27.29900000000012
  episode_reward_min: -42.600000000000335
  episodes_this_iter: 4
  episodes_total: 5868
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.8650675154521216
          cur_lr: 5.000000000000001e-05
          entropy: 0.7893763393163681
          entropy_coeff: 0.009999999999999998
          kl: 0.003061019055381959
          policy_loss: -0.06763880732986662
          total_loss: 1.092506707376904
          vf_explained_var: 0.44740748405456543
          vf_loss: 1.1653912934992048
    num_agent_steps_sampled: 1771000
    num_agent_steps_trained: 1771000
    num_steps_sampled: 1771000
    num_steps_trained: 1771000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1771,47404.9,1771000,-27.299,-19.2,-42.6,272.99


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1772000
  custom_metrics: {}
  date: 2021-10-29_10-17-15
  done: false
  episode_len_mean: 269.82
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -26.982000000000113
  episode_reward_min: -42.600000000000335
  episodes_this_iter: 5
  episodes_total: 5873
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4325337577260608
          cur_lr: 5.000000000000001e-05
          entropy: 0.3497866436839104
          entropy_coeff: 0.009999999999999998
          kl: 0.004994543378697674
          policy_loss: -0.042145827247036825
          total_loss: 1.527727973461151
          vf_explained_var: 0.24502667784690857
          vf_loss: 1.571211338043213
    num_agent_steps_sampled: 1772000
    num_agent_steps_trained: 1772000
    num_steps_sampled: 1772000
    num_steps_trained: 1772000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1772,47436.2,1772000,-26.982,-19.2,-42.6,269.82


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1773000
  custom_metrics: {}
  date: 2021-10-29_10-17-46
  done: false
  episode_len_mean: 266.0
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -26.600000000000104
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5877
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2162668788630304
          cur_lr: 5.000000000000001e-05
          entropy: 1.443040625254313
          entropy_coeff: 0.009999999999999998
          kl: 0.0215805607871131
          policy_loss: -0.01597305089235306
          total_loss: 0.5962301856941647
          vf_explained_var: 0.7229044437408447
          vf_loss: 0.6219664749171999
    num_agent_steps_sampled: 1773000
    num_agent_steps_trained: 1773000
    num_steps_sampled: 1773000
    num_steps_trained: 1773000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1773,47467.3,1773000,-26.6,-19.2,-38.1,266




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1774000
  custom_metrics: {}
  date: 2021-10-29_10-18-36
  done: false
  episode_len_mean: 262.71
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -26.271000000000107
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5881
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32440031829454563
          cur_lr: 5.000000000000001e-05
          entropy: 0.7710172795587116
          entropy_coeff: 0.009999999999999998
          kl: 0.009950672177270152
          policy_loss: 0.0501451620625125
          total_loss: 0.914035975933075
          vf_explained_var: 0.5755307674407959
          vf_loss: 0.8683729966481527
    num_agent_steps_sampled: 1774000
    num_agent_steps_trained: 1774000
    num_steps_sampled: 1774000
    num_steps_trained: 1774000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1774,47517.8,1774000,-26.271,-19.2,-38.1,262.71


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1775000
  custom_metrics: {}
  date: 2021-10-29_10-19-05
  done: false
  episode_len_mean: 260.6
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -26.0600000000001
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5885
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32440031829454563
          cur_lr: 5.000000000000001e-05
          entropy: 1.0455744822820028
          entropy_coeff: 0.009999999999999998
          kl: 0.00609966476199825
          policy_loss: -0.05762459370825026
          total_loss: 0.7257457130485111
          vf_explained_var: 0.6188516616821289
          vf_loss: 0.7918473217222426
    num_agent_steps_sampled: 1775000
    num_agent_steps_trained: 1775000
    num_steps_sampled: 1775000
    num_steps_trained: 1775000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1775,47546.2,1775000,-26.06,-19.2,-38.1,260.6


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1776000
  custom_metrics: {}
  date: 2021-10-29_10-19-33
  done: false
  episode_len_mean: 260.47
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -26.047000000000104
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5889
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32440031829454563
          cur_lr: 5.000000000000001e-05
          entropy: 1.4753819876246983
          entropy_coeff: 0.009999999999999998
          kl: 0.013267062301195114
          policy_loss: 0.009203292760584089
          total_loss: 0.8584610998630524
          vf_explained_var: 0.6183509826660156
          vf_loss: 0.8597077906131745
    num_agent_steps_sampled: 1776000
    num_agent_steps_trained: 1776000
    num_steps_sampled: 1776000
    num_steps_trained: 1776000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1776,47574.7,1776000,-26.047,-19.2,-38.1,260.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1777000
  custom_metrics: {}
  date: 2021-10-29_10-20-02
  done: false
  episode_len_mean: 259.32
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.9320000000001
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5893
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32440031829454563
          cur_lr: 5.000000000000001e-05
          entropy: 0.7353007458978229
          entropy_coeff: 0.009999999999999998
          kl: 0.00975023929679403
          policy_loss: -0.03504778014289008
          total_loss: 0.9658470392227173
          vf_explained_var: 0.35336631536483765
          vf_loss: 1.0050848417811924
    num_agent_steps_sampled: 1777000
    num_agent_steps_trained: 1777000
    num_steps_sampled: 1777000
    num_steps_trained: 1777000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1777,47603.8,1777000,-25.932,-19.2,-38.1,259.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1778000
  custom_metrics: {}
  date: 2021-10-29_10-20-33
  done: false
  episode_len_mean: 259.75
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.9750000000001
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5897
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32440031829454563
          cur_lr: 5.000000000000001e-05
          entropy: 1.0651769293679132
          entropy_coeff: 0.009999999999999998
          kl: 0.00630122251395188
          policy_loss: 0.025999958564837774
          total_loss: 0.8916621698273552
          vf_explained_var: 0.4035099446773529
          vf_loss: 0.8742698691785336
    num_agent_steps_sampled: 1778000
    num_agent_steps_trained: 1778000
    num_steps_sampled: 1778000
    num_steps_trained: 1778000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1778,47634.1,1778000,-25.975,-19.2,-38.1,259.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1779000
  custom_metrics: {}
  date: 2021-10-29_10-21-00
  done: false
  episode_len_mean: 260.43
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -26.043000000000102
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 3
  episodes_total: 5900
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32440031829454563
          cur_lr: 5.000000000000001e-05
          entropy: 1.4443690909279718
          entropy_coeff: 0.009999999999999998
          kl: 0.010533735543600879
          policy_loss: 0.008660102677014139
          total_loss: 0.5057210488451852
          vf_explained_var: 0.6515520811080933
          vf_loss: 0.5080874896711773
    num_agent_steps_sampled: 1779000
    num_agent_steps_trained: 1779000
    num_steps_sampled: 1779000
    num_steps_trained: 1779000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1779,47661.1,1779000,-26.043,-19.2,-38.1,260.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1780000
  custom_metrics: {}
  date: 2021-10-29_10-21-28
  done: false
  episode_len_mean: 259.21
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.9210000000001
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5904
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32440031829454563
          cur_lr: 5.000000000000001e-05
          entropy: 1.0735146476162805
          entropy_coeff: 0.009999999999999998
          kl: 0.01575031827414569
          policy_loss: 0.041753761139180924
          total_loss: 0.5720471936795447
          vf_explained_var: 0.6950839757919312
          vf_loss: 0.5359191629621718
    num_agent_steps_sampled: 1780000
    num_agent_steps_trained: 1780000
    num_steps_sampled: 1780000
    num_steps_trained: 1780000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1780,47689.8,1780000,-25.921,-19.2,-38.1,259.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1781000
  custom_metrics: {}
  date: 2021-10-29_10-21-58
  done: false
  episode_len_mean: 258.11
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.81100000000009
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5908
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32440031829454563
          cur_lr: 5.000000000000001e-05
          entropy: 0.85415112276872
          entropy_coeff: 0.009999999999999998
          kl: 0.022291465823780537
          policy_loss: 0.07536860410537985
          total_loss: 0.7511528104543685
          vf_explained_var: 0.432168185710907
          vf_loss: 0.6770943658219444
    num_agent_steps_sampled: 1781000
    num_agent_steps_trained: 1781000
    num_steps_sampled: 1781000
    num_steps_trained: 1781000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1781,47719.2,1781000,-25.811,-19.2,-38.1,258.11




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1782000
  custom_metrics: {}
  date: 2021-10-29_10-22-44
  done: false
  episode_len_mean: 257.39
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.739000000000093
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5912
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48660047744181834
          cur_lr: 5.000000000000001e-05
          entropy: 1.4186745882034302
          entropy_coeff: 0.009999999999999998
          kl: 0.010562907628515935
          policy_loss: 0.023797544547253184
          total_loss: 0.865731010834376
          vf_explained_var: 0.6186986565589905
          vf_loss: 0.8509802798430125
    num_agent_steps_sampled: 1782000
    num_agent_steps_trained: 1782000
    num_steps_sampled: 1782000
    num_steps_trained: 1782000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1782,47765,1782000,-25.739,-19.2,-38.1,257.39


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1783000
  custom_metrics: {}
  date: 2021-10-29_10-23-12
  done: false
  episode_len_mean: 256.62
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.6620000000001
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5916
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48660047744181834
          cur_lr: 5.000000000000001e-05
          entropy: 1.2008600221739876
          entropy_coeff: 0.009999999999999998
          kl: 0.009669630570307201
          policy_loss: -0.005573354495896234
          total_loss: 0.8786298473676045
          vf_explained_var: 0.5751461982727051
          vf_loss: 0.8915065556764603
    num_agent_steps_sampled: 1783000
    num_agent_steps_trained: 1783000
    num_steps_sampled: 1783000
    num_steps_trained: 1783000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1783,47793.1,1783000,-25.662,-19.2,-38.1,256.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1784000
  custom_metrics: {}
  date: 2021-10-29_10-23-40
  done: false
  episode_len_mean: 257.07
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.707000000000097
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5920
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48660047744181834
          cur_lr: 5.000000000000001e-05
          entropy: 0.8788356135288874
          entropy_coeff: 0.009999999999999998
          kl: 0.007880830463434386
          policy_loss: 0.06946301741732491
          total_loss: 1.037991926405165
          vf_explained_var: 0.5198920965194702
          vf_loss: 0.9734824422332976
    num_agent_steps_sampled: 1784000
    num_agent_steps_trained: 1784000
    num_steps_sampled: 1784000
    num_steps_trained: 1784000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1784,47821.6,1784000,-25.707,-19.2,-38.1,257.07


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1785000
  custom_metrics: {}
  date: 2021-10-29_10-24-11
  done: false
  episode_len_mean: 254.81
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.481000000000094
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5924
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48660047744181834
          cur_lr: 5.000000000000001e-05
          entropy: 0.5855007206400236
          entropy_coeff: 0.009999999999999998
          kl: 0.009286446556014185
          policy_loss: 0.07801376564635171
          total_loss: 0.575818713174926
          vf_explained_var: 0.718937337398529
          vf_loss: 0.499141158329116
    num_agent_steps_sampled: 1785000
    num_agent_steps_trained: 1785000
    num_steps_sampled: 1785000
    num_steps_trained: 1785000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1785,47852.7,1785000,-25.481,-19.2,-38.1,254.81


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1786000
  custom_metrics: {}
  date: 2021-10-29_10-24-45
  done: false
  episode_len_mean: 254.21
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.42100000000009
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5928
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.48660047744181834
          cur_lr: 5.000000000000001e-05
          entropy: 0.542555835015244
          entropy_coeff: 0.009999999999999998
          kl: 0.0035901813901040845
          policy_loss: 0.0626068674855762
          total_loss: 0.6569150315390693
          vf_explained_var: 0.5728689432144165
          vf_loss: 0.5979867416951391
    num_agent_steps_sampled: 1786000
    num_agent_steps_trained: 1786000
    num_steps_sampled: 1786000
    num_steps_trained: 1786000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1786,47886.4,1786000,-25.421,-19.2,-38.1,254.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1787000
  custom_metrics: {}
  date: 2021-10-29_10-25-18
  done: false
  episode_len_mean: 253.19
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.319000000000095
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 5
  episodes_total: 5933
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24330023872090917
          cur_lr: 5.000000000000001e-05
          entropy: 0.5626838728785515
          entropy_coeff: 0.009999999999999998
          kl: 0.019293678529670868
          policy_loss: -0.02921154292093383
          total_loss: 1.2545165187782712
          vf_explained_var: 0.370900422334671
          vf_loss: 1.284660757250256
    num_agent_steps_sampled: 1787000
    num_agent_steps_trained: 1787000
    num_steps_sampled: 1787000
    num_steps_trained: 1787000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1787,47918.8,1787000,-25.319,-19.2,-38.1,253.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1788000
  custom_metrics: {}
  date: 2021-10-29_10-25-46
  done: false
  episode_len_mean: 253.56
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.35600000000009
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 3
  episodes_total: 5936
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24330023872090917
          cur_lr: 5.000000000000001e-05
          entropy: 0.8464889576037725
          entropy_coeff: 0.009999999999999998
          kl: 0.01550053386085612
          policy_loss: -0.13114283151096767
          total_loss: 0.6061415010028415
          vf_explained_var: 0.4919603168964386
          vf_loss: 0.7419779393408034
    num_agent_steps_sampled: 1788000
    num_agent_steps_trained: 1788000
    num_steps_sampled: 1788000
    num_steps_trained: 1788000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1788,47947.4,1788000,-25.356,-19.2,-38.1,253.56




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1789000
  custom_metrics: {}
  date: 2021-10-29_10-26-32
  done: false
  episode_len_mean: 252.85
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.285000000000093
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5940
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24330023872090917
          cur_lr: 5.000000000000001e-05
          entropy: 0.9943498316738341
          entropy_coeff: 0.009999999999999998
          kl: 0.014112155772160564
          policy_loss: -0.15721868748466175
          total_loss: 0.637542524933815
          vf_explained_var: 0.6952741146087646
          vf_loss: 0.8012712270021438
    num_agent_steps_sampled: 1789000
    num_agent_steps_trained: 1789000
    num_steps_sampled: 1789000
    num_steps_trained: 1789000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1789,47993.7,1789000,-25.285,-19.2,-38.1,252.85


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1790000
  custom_metrics: {}
  date: 2021-10-29_10-27-02
  done: false
  episode_len_mean: 252.88
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.288000000000093
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5944
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.24330023872090917
          cur_lr: 5.000000000000001e-05
          entropy: 0.9072257025374306
          entropy_coeff: 0.009999999999999998
          kl: 0.026372672242793112
          policy_loss: -0.053351006739669374
          total_loss: 0.6293096943034067
          vf_explained_var: 0.6455326676368713
          vf_loss: 0.6853164798683591
    num_agent_steps_sampled: 1790000
    num_agent_steps_trained: 1790000
    num_steps_sampled: 1790000
    num_steps_trained: 179000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1790,48023.1,1790000,-25.288,-19.2,-38.1,252.88


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1791000
  custom_metrics: {}
  date: 2021-10-29_10-27-29
  done: false
  episode_len_mean: 252.87
  episode_media: {}
  episode_reward_max: -19.200000000000003
  episode_reward_mean: -25.287000000000095
  episode_reward_min: -38.10000000000027
  episodes_this_iter: 4
  episodes_total: 5948
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3649503580813637
          cur_lr: 5.000000000000001e-05
          entropy: 0.969796841012107
          entropy_coeff: 0.009999999999999998
          kl: 0.011792288702173989
          policy_loss: 0.006887048400110668
          total_loss: 0.6754575113455454
          vf_explained_var: 0.556093692779541
          vf_loss: 0.67396483172973
    num_agent_steps_sampled: 1791000
    num_agent_steps_trained: 1791000
    num_steps_sampled: 1791000
    num_steps_trained: 1791000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1791,48050.3,1791000,-25.287,-19.2,-38.1,252.87


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1792000
  custom_metrics: {}
  date: 2021-10-29_10-27-59
  done: false
  episode_len_mean: 251.98
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -25.19800000000009
  episode_reward_min: -33.2000000000002
  episodes_this_iter: 4
  episodes_total: 5952
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3649503580813637
          cur_lr: 5.000000000000001e-05
          entropy: 0.6745148973332511
          entropy_coeff: 0.009999999999999998
          kl: 0.01712954767431793
          policy_loss: -0.060723903444078235
          total_loss: 0.7862712389893002
          vf_explained_var: 0.5705057978630066
          vf_loss: 0.8474888569778867
    num_agent_steps_sampled: 1792000
    num_agent_steps_trained: 1792000
    num_steps_sampled: 1792000
    num_steps_trained: 1792000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1792,48080.5,1792000,-25.198,-19.4,-33.2,251.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1793000
  custom_metrics: {}
  date: 2021-10-29_10-28-30
  done: false
  episode_len_mean: 251.98
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -25.198000000000082
  episode_reward_min: -33.2000000000002
  episodes_this_iter: 4
  episodes_total: 5956
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3649503580813637
          cur_lr: 5.000000000000001e-05
          entropy: 0.6489005769292514
          entropy_coeff: 0.009999999999999998
          kl: 0.01879276340802297
          policy_loss: 0.004075789203246434
          total_loss: 0.7649283670716815
          vf_explained_var: 0.5760058760643005
          vf_loss: 0.7604831463760799
    num_agent_steps_sampled: 1793000
    num_agent_steps_trained: 1793000
    num_steps_sampled: 1793000
    num_steps_trained: 1793000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1793,48110.7,1793000,-25.198,-19.4,-33.2,251.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1794000
  custom_metrics: {}
  date: 2021-10-29_10-29-00
  done: false
  episode_len_mean: 249.47
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.947000000000084
  episode_reward_min: -30.700000000000166
  episodes_this_iter: 5
  episodes_total: 5961
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3649503580813637
          cur_lr: 5.000000000000001e-05
          entropy: 0.6136664612425698
          entropy_coeff: 0.009999999999999998
          kl: 0.024373155509675195
          policy_loss: -0.07050229029523002
          total_loss: 0.7606506092680825
          vf_explained_var: 0.6008960008621216
          vf_loss: 0.8283945629994075
    num_agent_steps_sampled: 1794000
    num_agent_steps_trained: 1794000
    num_steps_sampled: 1794000
    num_steps_trained: 1794000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1794,48140.9,1794000,-24.947,-19.4,-30.7,249.47


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1795000
  custom_metrics: {}
  date: 2021-10-29_10-29-30
  done: false
  episode_len_mean: 248.84
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.884000000000082
  episode_reward_min: -30.700000000000166
  episodes_this_iter: 4
  episodes_total: 5965
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5474255371220454
          cur_lr: 5.000000000000001e-05
          entropy: 0.8062014506922828
          entropy_coeff: 0.009999999999999998
          kl: 0.01372696932983912
          policy_loss: -0.05244298610422346
          total_loss: 0.7090840611192916
          vf_explained_var: 0.39492082595825195
          vf_loss: 0.762074568702115
    num_agent_steps_sampled: 1795000
    num_agent_steps_trained: 1795000
    num_steps_sampled: 1795000
    num_steps_trained: 1795000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1795,48171.1,1795000,-24.884,-19.4,-30.7,248.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1796000
  custom_metrics: {}
  date: 2021-10-29_10-30-00
  done: false
  episode_len_mean: 248.9
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.890000000000082
  episode_reward_min: -30.700000000000166
  episodes_this_iter: 4
  episodes_total: 5969
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5474255371220454
          cur_lr: 5.000000000000001e-05
          entropy: 0.6456068494253688
          entropy_coeff: 0.009999999999999998
          kl: 0.0030188918965680816
          policy_loss: -0.05960482921865251
          total_loss: 0.7176567286252975
          vf_explained_var: 0.5334973335266113
          vf_loss: 0.7820650104019377
    num_agent_steps_sampled: 1796000
    num_agent_steps_trained: 1796000
    num_steps_sampled: 1796000
    num_steps_trained: 1796000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1796,48201.2,1796000,-24.89,-19.4,-30.7,248.9




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1797000
  custom_metrics: {}
  date: 2021-10-29_10-30-47
  done: false
  episode_len_mean: 249.98
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -24.99800000000009
  episode_reward_min: -30.700000000000166
  episodes_this_iter: 4
  episodes_total: 5973
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2737127685610227
          cur_lr: 5.000000000000001e-05
          entropy: 0.7270499504274792
          entropy_coeff: 0.009999999999999998
          kl: 0.00722447576338507
          policy_loss: 0.0068077336582872605
          total_loss: 0.8096969503495428
          vf_explained_var: 0.5629475712776184
          vf_loss: 0.808182285560502
    num_agent_steps_sampled: 1797000
    num_agent_steps_trained: 1797000
    num_steps_sampled: 1797000
    num_steps_trained: 1797000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1797,48247.7,1797000,-24.998,-19.4,-30.7,249.98


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1798000
  custom_metrics: {}
  date: 2021-10-29_10-31-14
  done: false
  episode_len_mean: 251.02
  episode_media: {}
  episode_reward_max: -19.400000000000006
  episode_reward_mean: -25.102000000000086
  episode_reward_min: -31.200000000000173
  episodes_this_iter: 3
  episodes_total: 5976
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2737127685610227
          cur_lr: 5.000000000000001e-05
          entropy: 1.3194591416252983
          entropy_coeff: 0.009999999999999998
          kl: 0.04801726043272952
          policy_loss: 0.019967089676194722
          total_loss: 0.7154009719689687
          vf_explained_var: 0.5680352449417114
          vf_loss: 0.6954855233430862
    num_agent_steps_sampled: 1798000
    num_agent_steps_trained: 1798000
    num_steps_sampled: 1798000
    num_steps_trained: 1798000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1798,48275,1798000,-25.102,-19.4,-31.2,251.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1799000
  custom_metrics: {}
  date: 2021-10-29_10-31-41
  done: false
  episode_len_mean: 252.52
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.252000000000084
  episode_reward_min: -31.200000000000173
  episodes_this_iter: 4
  episodes_total: 5980
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4105691528415341
          cur_lr: 5.000000000000001e-05
          entropy: 1.4072043425507015
          entropy_coeff: 0.009999999999999998
          kl: 0.020909991866568275
          policy_loss: -0.003748040646314621
          total_loss: 0.9393995013501909
          vf_explained_var: 0.3912389576435089
          vf_loss: 0.9486345973279741
    num_agent_steps_sampled: 1799000
    num_agent_steps_trained: 1799000
    num_steps_sampled: 1799000
    num_steps_trained: 1799000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1799,48302.1,1799000,-25.252,-19.6,-31.2,252.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1800000
  custom_metrics: {}
  date: 2021-10-29_10-32-06
  done: false
  episode_len_mean: 253.84
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.384000000000093
  episode_reward_min: -31.200000000000173
  episodes_this_iter: 3
  episodes_total: 5983
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 1.3007266402244568
          entropy_coeff: 0.009999999999999998
          kl: 0.007195277171961973
          policy_loss: 0.027712113327450223
          total_loss: 0.9832161674896877
          vf_explained_var: -0.006793441250920296
          vf_loss: 0.964080085274246
    num_agent_steps_sampled: 1800000
    num_agent_steps_trained: 1800000
    num_steps_sampled: 1800000
    num_steps_trained: 180000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1800,48326.6,1800000,-25.384,-19.6,-31.2,253.84


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1801000
  custom_metrics: {}
  date: 2021-10-29_10-32-34
  done: false
  episode_len_mean: 254.36
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.43600000000009
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 5987
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 1.1812590844101376
          entropy_coeff: 0.009999999999999998
          kl: 0.015188573279241445
          policy_loss: 0.0325067197283109
          total_loss: 0.9024423705206976
          vf_explained_var: 0.5294831395149231
          vf_loss: 0.8723942991760042
    num_agent_steps_sampled: 1801000
    num_agent_steps_trained: 1801000
    num_steps_sampled: 1801000
    num_steps_trained: 1801000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1801,48354.6,1801000,-25.436,-19.6,-32.9,254.36


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1802000
  custom_metrics: {}
  date: 2021-10-29_10-33-03
  done: false
  episode_len_mean: 253.77
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.377000000000088
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 5991
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 0.7627467211749819
          entropy_coeff: 0.009999999999999998
          kl: 0.010112727299850613
          policy_loss: 0.09058780943353971
          total_loss: 0.8136783331632614
          vf_explained_var: 0.6020402312278748
          vf_loss: 0.7244900312688616
    num_agent_steps_sampled: 1802000
    num_agent_steps_trained: 1802000
    num_steps_sampled: 1802000
    num_steps_trained: 1802000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1802,48384.4,1802000,-25.377,-19.6,-32.9,253.77


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1803000
  custom_metrics: {}
  date: 2021-10-29_10-33-32
  done: false
  episode_len_mean: 253.37
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.337000000000092
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 5995
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 1.391572716501024
          entropy_coeff: 0.009999999999999998
          kl: 0.010707005257603756
          policy_loss: 0.06662898518972926
          total_loss: 0.6022454334629906
          vf_explained_var: 0.6746701002120972
          vf_loss: 0.5429382271236843
    num_agent_steps_sampled: 1803000
    num_agent_steps_trained: 1803000
    num_steps_sampled: 1803000
    num_steps_trained: 1803000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1803,48413.3,1803000,-25.337,-19.6,-32.9,253.37


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1804000
  custom_metrics: {}
  date: 2021-10-29_10-34-03
  done: false
  episode_len_mean: 253.37
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.337000000000092
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 5999
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 0.6300742500358157
          entropy_coeff: 0.009999999999999998
          kl: 0.011777034466145468
          policy_loss: -1.6405764553281995e-05
          total_loss: 0.7560829705662198
          vf_explained_var: 0.5774030685424805
          vf_loss: 0.7551471915509965
    num_agent_steps_sampled: 1804000
    num_agent_steps_trained: 1804000
    num_steps_sampled: 1804000
    num_steps_trained: 1804000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1804,48444.4,1804000,-25.337,-19.6,-32.9,253.37




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1805000
  custom_metrics: {}
  date: 2021-10-29_10-34-50
  done: false
  episode_len_mean: 252.42
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.24200000000008
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6003
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 0.5490123502082295
          entropy_coeff: 0.009999999999999998
          kl: 0.007859761617731052
          policy_loss: 0.002783056265778012
          total_loss: 0.876976748307546
          vf_explained_var: 0.522591769695282
          vf_loss: 0.8748433583312565
    num_agent_steps_sampled: 1805000
    num_agent_steps_trained: 1805000
    num_steps_sampled: 1805000
    num_steps_trained: 1805000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1805,48491.3,1805000,-25.242,-19.6,-32.9,252.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1806000
  custom_metrics: {}
  date: 2021-10-29_10-35-19
  done: false
  episode_len_mean: 252.62
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.262000000000086
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6007
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 1.3553041021029155
          entropy_coeff: 0.009999999999999998
          kl: 0.013988463754941765
          policy_loss: 0.09456954797108968
          total_loss: 0.6182597673601574
          vf_explained_var: 0.6189727187156677
          vf_loss: 0.5286284108956655
    num_agent_steps_sampled: 1806000
    num_agent_steps_trained: 1806000
    num_steps_sampled: 1806000
    num_steps_trained: 1806000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1806,48519.8,1806000,-25.262,-19.6,-32.9,252.62


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1807000
  custom_metrics: {}
  date: 2021-10-29_10-35-48
  done: false
  episode_len_mean: 252.42
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.242000000000086
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6011
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 1.2043113589286805
          entropy_coeff: 0.009999999999999998
          kl: 0.009606048584394837
          policy_loss: 0.006266919440693326
          total_loss: 0.6285674982600742
          vf_explained_var: 0.49742239713668823
          vf_loss: 0.6284277760320239
    num_agent_steps_sampled: 1807000
    num_agent_steps_trained: 1807000
    num_steps_sampled: 1807000
    num_steps_trained: 1807000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1807,48549.3,1807000,-25.242,-19.6,-32.9,252.42


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1808000
  custom_metrics: {}
  date: 2021-10-29_10-36-16
  done: false
  episode_len_mean: 252.75
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.27500000000008
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6015
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 1.1031364414427016
          entropy_coeff: 0.009999999999999998
          kl: 0.016649738226078972
          policy_loss: -0.08219741731882095
          total_loss: 0.7244022789928648
          vf_explained_var: 0.6386553049087524
          vf_loss: 0.807377259598838
    num_agent_steps_sampled: 1808000
    num_agent_steps_trained: 1808000
    num_steps_sampled: 1808000
    num_steps_trained: 1808000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1808,48577.1,1808000,-25.275,-19.6,-32.9,252.75


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1809000
  custom_metrics: {}
  date: 2021-10-29_10-36-45
  done: false
  episode_len_mean: 252.61
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.26100000000009
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6019
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 0.7626802232530382
          entropy_coeff: 0.009999999999999998
          kl: 0.011971805350393104
          policy_loss: -0.12845858310659727
          total_loss: 0.921510867940055
          vf_explained_var: 0.39672499895095825
          vf_loss: 1.0502233597967359
    num_agent_steps_sampled: 1809000
    num_agent_steps_trained: 1809000
    num_steps_sampled: 1809000
    num_steps_trained: 1809000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1809,48605.4,1809000,-25.261,-19.6,-32.9,252.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1810000
  custom_metrics: {}
  date: 2021-10-29_10-37-15
  done: false
  episode_len_mean: 252.52
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.252000000000088
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6023
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6158537292623012
          cur_lr: 5.000000000000001e-05
          entropy: 0.6509061170948877
          entropy_coeff: 0.009999999999999998
          kl: 0.0035848283852428628
          policy_loss: -0.006421437859535218
          total_loss: 0.8816643721527524
          vf_explained_var: 0.5272409915924072
          vf_loss: 0.8923871543672349
    num_agent_steps_sampled: 1810000
    num_agent_steps_trained: 1810000
    num_steps_sampled: 1810000
    num_steps_trained: 1810000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1810,48636,1810000,-25.252,-19.6,-32.9,252.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1811000
  custom_metrics: {}
  date: 2021-10-29_10-37-48
  done: false
  episode_len_mean: 252.22
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.222000000000083
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6027
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3079268646311506
          cur_lr: 5.000000000000001e-05
          entropy: 0.5961015817191866
          entropy_coeff: 0.009999999999999998
          kl: 0.008275568731318433
          policy_loss: 0.0037029942704571617
          total_loss: 1.1273530860741934
          vf_explained_var: 0.5164296627044678
          vf_loss: 1.1270628309912152
    num_agent_steps_sampled: 1811000
    num_agent_steps_trained: 1811000
    num_steps_sampled: 1811000
    num_steps_trained: 1811000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1811,48668.5,1811000,-25.222,-19.6,-32.9,252.22




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1812000
  custom_metrics: {}
  date: 2021-10-29_10-38-36
  done: false
  episode_len_mean: 252.52
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.252000000000088
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6031
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3079268646311506
          cur_lr: 5.000000000000001e-05
          entropy: 0.6236234184768464
          entropy_coeff: 0.009999999999999998
          kl: 0.009623203667000505
          policy_loss: -0.12007130392723614
          total_loss: 1.0543321871095233
          vf_explained_var: 0.5916719436645508
          vf_loss: 1.177676468425327
    num_agent_steps_sampled: 1812000
    num_agent_steps_trained: 1812000
    num_steps_sampled: 1812000
    num_steps_trained: 1812000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1812,48716.8,1812000,-25.252,-19.6,-32.9,252.52


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1813000
  custom_metrics: {}
  date: 2021-10-29_10-39-06
  done: false
  episode_len_mean: 252.59
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.259000000000086
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6035
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3079268646311506
          cur_lr: 5.000000000000001e-05
          entropy: 0.773346205883556
          entropy_coeff: 0.009999999999999998
          kl: 0.009159081183322747
          policy_loss: -0.11614096148146523
          total_loss: 0.5612297584613164
          vf_explained_var: 0.7404353618621826
          vf_loss: 0.6822838571336535
    num_agent_steps_sampled: 1813000
    num_agent_steps_trained: 1813000
    num_steps_sampled: 1813000
    num_steps_trained: 1813000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1813,48746.4,1813000,-25.259,-19.6,-32.9,252.59


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1814000
  custom_metrics: {}
  date: 2021-10-29_10-39-34
  done: false
  episode_len_mean: 252.46
  episode_media: {}
  episode_reward_max: -19.60000000000001
  episode_reward_mean: -25.246000000000084
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6039
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3079268646311506
          cur_lr: 5.000000000000001e-05
          entropy: 1.343817200925615
          entropy_coeff: 0.009999999999999998
          kl: 0.018481321437027513
          policy_loss: 0.006420867227845722
          total_loss: 0.703250519434611
          vf_explained_var: 0.5607632994651794
          vf_loss: 0.7045769333839417
    num_agent_steps_sampled: 1814000
    num_agent_steps_trained: 1814000
    num_steps_sampled: 1814000
    num_steps_trained: 1814000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1814,48775.2,1814000,-25.246,-19.6,-32.9,252.46


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1815000
  custom_metrics: {}
  date: 2021-10-29_10-40-02
  done: false
  episode_len_mean: 253.57
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.357000000000088
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 4
  episodes_total: 6043
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3079268646311506
          cur_lr: 5.000000000000001e-05
          entropy: 1.380518505308363
          entropy_coeff: 0.009999999999999998
          kl: 0.010921100112594218
          policy_loss: -0.03319420359200902
          total_loss: 0.5350445846716563
          vf_explained_var: 0.6830958724021912
          vf_loss: 0.5786810729238722
    num_agent_steps_sampled: 1815000
    num_agent_steps_trained: 1815000
    num_steps_sampled: 1815000
    num_steps_trained: 1815000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1815,48802.3,1815000,-25.357,-20.7,-32.9,253.57


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1816000
  custom_metrics: {}
  date: 2021-10-29_10-40-28
  done: false
  episode_len_mean: 253.69
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.36900000000009
  episode_reward_min: -32.9000000000002
  episodes_this_iter: 3
  episodes_total: 6046
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3079268646311506
          cur_lr: 5.000000000000001e-05
          entropy: 1.3415097342597113
          entropy_coeff: 0.009999999999999998
          kl: 0.025010326474012086
          policy_loss: -0.07837302883466085
          total_loss: 0.9137292650010851
          vf_explained_var: 0.3883025348186493
          vf_loss: 0.9978160404496723
    num_agent_steps_sampled: 1816000
    num_agent_steps_trained: 1816000
    num_steps_sampled: 1816000
    num_steps_trained: 1816000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1816,48828.9,1816000,-25.369,-20.7,-32.9,253.69


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1817000
  custom_metrics: {}
  date: 2021-10-29_10-40-54
  done: false
  episode_len_mean: 255.53
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.55300000000009
  episode_reward_min: -33.3000000000002
  episodes_this_iter: 4
  episodes_total: 6050
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.4576917674806382
          entropy_coeff: 0.009999999999999998
          kl: 0.011392596331319914
          policy_loss: -0.0037088495161798265
          total_loss: 0.7023644536733628
          vf_explained_var: 0.48840683698654175
          vf_loss: 0.7153880804777145
    num_agent_steps_sampled: 1817000
    num_agent_steps_trained: 1817000
    num_steps_sampled: 1817000
    num_steps_trained: 181700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1817,48855,1817000,-25.553,-20.7,-33.3,255.53


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1818000
  custom_metrics: {}
  date: 2021-10-29_10-41-17
  done: false
  episode_len_mean: 257.96
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.796000000000095
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 6053
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.3263086292478774
          entropy_coeff: 0.009999999999999998
          kl: 0.018876793525654356
          policy_loss: 0.03769811557398902
          total_loss: 0.43351708435349995
          vf_explained_var: 0.776943027973175
          vf_loss: 0.40036304609643086
    num_agent_steps_sampled: 1818000
    num_agent_steps_trained: 1818000
    num_steps_sampled: 1818000
    num_steps_trained: 181800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1818,48878,1818000,-25.796,-20.7,-41.8,257.96


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1819000
  custom_metrics: {}
  date: 2021-10-29_10-41-45
  done: false
  episode_len_mean: 259.85
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -25.985000000000095
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6057
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.251600044965744
          entropy_coeff: 0.009999999999999998
          kl: 0.015384243081370385
          policy_loss: 0.043503559960259335
          total_loss: 0.698883522550265
          vf_explained_var: 0.550715446472168
          vf_loss: 0.6607901248666975
    num_agent_steps_sampled: 1819000
    num_agent_steps_trained: 1819000
    num_steps_sampled: 1819000
    num_steps_trained: 1819000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1819,48905.3,1819000,-25.985,-20.7,-41.8,259.85




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1820000
  custom_metrics: {}
  date: 2021-10-29_10-42-28
  done: false
  episode_len_mean: 260.03
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.003000000000096
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 6060
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.082119193341997
          entropy_coeff: 0.009999999999999998
          kl: 0.013143453858022743
          policy_loss: -0.13429243316253026
          total_loss: 0.9576490759849549
          vf_explained_var: 0.2998679578304291
          vf_loss: 1.0966918680402968
    num_agent_steps_sampled: 1820000
    num_agent_steps_trained: 1820000
    num_steps_sampled: 1820000
    num_steps_trained: 1820000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1820,48948.7,1820000,-26.003,-20.7,-41.8,260.03


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1821000
  custom_metrics: {}
  date: 2021-10-29_10-42-59
  done: false
  episode_len_mean: 261.61
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.1610000000001
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6064
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.181730071703593
          entropy_coeff: 0.009999999999999998
          kl: 0.008445126227669103
          policy_loss: 0.043129738171895346
          total_loss: 0.7506891363196903
          vf_explained_var: 0.43081140518188477
          vf_loss: 0.7154759744803111
    num_agent_steps_sampled: 1821000
    num_agent_steps_trained: 1821000
    num_steps_sampled: 1821000
    num_steps_trained: 1821000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1821,48979.9,1821000,-26.161,-20.7,-41.8,261.61


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1822000
  custom_metrics: {}
  date: 2021-10-29_10-43-28
  done: false
  episode_len_mean: 262.43
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.243000000000105
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6068
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.1134645726945664
          entropy_coeff: 0.009999999999999998
          kl: 0.009973937270466927
          policy_loss: 0.021855126693844797
          total_loss: 0.7573262724611495
          vf_explained_var: 0.43569809198379517
          vf_loss: 0.7419989274607764
    num_agent_steps_sampled: 1822000
    num_agent_steps_trained: 1822000
    num_steps_sampled: 1822000
    num_steps_trained: 18220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1822,49009,1822000,-26.243,-20.7,-41.8,262.43


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1823000
  custom_metrics: {}
  date: 2021-10-29_10-43-56
  done: false
  episode_len_mean: 263.25
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.3250000000001
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6072
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.3425018337037828
          entropy_coeff: 0.009999999999999998
          kl: 0.006653346503204342
          policy_loss: -0.009557583348618613
          total_loss: 0.7265579915708966
          vf_explained_var: 0.4362391233444214
          vf_loss: 0.74646747989787
    num_agent_steps_sampled: 1823000
    num_agent_steps_trained: 1823000
    num_steps_sampled: 1823000
    num_steps_trained: 1823000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1823,49036.2,1823000,-26.325,-20.7,-41.8,263.25


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1824000
  custom_metrics: {}
  date: 2021-10-29_10-44-24
  done: false
  episode_len_mean: 262.02
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.2020000000001
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 6075
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.1967723257011837
          entropy_coeff: 0.009999999999999998
          kl: 0.014359648544875923
          policy_loss: -0.12614664692017769
          total_loss: 1.0160954362816281
          vf_explained_var: 0.33774062991142273
          vf_loss: 1.1475772268242306
    num_agent_steps_sampled: 1824000
    num_agent_steps_trained: 1824000
    num_steps_sampled: 1824000
    num_steps_trained: 1824000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1824,49064.1,1824000,-26.202,-20.7,-41.8,262.02


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1825000
  custom_metrics: {}
  date: 2021-10-29_10-44-53
  done: false
  episode_len_mean: 262.38
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.2380000000001
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6079
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.244522307978736
          entropy_coeff: 0.009999999999999998
          kl: 0.01364709904007005
          policy_loss: -0.02001671517888705
          total_loss: 0.9898370524247487
          vf_explained_var: 0.3227999210357666
          vf_loss: 1.0159955306185617
    num_agent_steps_sampled: 1825000
    num_agent_steps_trained: 1825000
    num_steps_sampled: 1825000
    num_steps_trained: 1825000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1825,49094,1825000,-26.238,-20.7,-41.8,262.38


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1826000
  custom_metrics: {}
  date: 2021-10-29_10-45-20
  done: false
  episode_len_mean: 261.19
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.119000000000106
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6083
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.46189029694672595
          cur_lr: 5.000000000000001e-05
          entropy: 1.2066406713591682
          entropy_coeff: 0.009999999999999998
          kl: 0.025995133476815764
          policy_loss: 0.023230644398265415
          total_loss: 0.884086047940784
          vf_explained_var: 0.3639562726020813
          vf_loss: 0.8609149214294222
    num_agent_steps_sampled: 1826000
    num_agent_steps_trained: 1826000
    num_steps_sampled: 1826000
    num_steps_trained: 1826000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1826,49120.6,1826000,-26.119,-20.7,-41.8,261.19


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1827000
  custom_metrics: {}
  date: 2021-10-29_10-45-43
  done: false
  episode_len_mean: 263.07
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.30700000000011
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 6086
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6928354454200889
          cur_lr: 5.000000000000001e-05
          entropy: 1.400440204805798
          entropy_coeff: 0.009999999999999998
          kl: 0.007597061326321687
          policy_loss: 0.035031106571356455
          total_loss: 0.6781217124727037
          vf_explained_var: 0.47248318791389465
          vf_loss: 0.6518314925332864
    num_agent_steps_sampled: 1827000
    num_agent_steps_trained: 1827000
    num_steps_sampled: 1827000
    num_steps_trained: 1827000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1827,49143.5,1827000,-26.307,-20.7,-41.8,263.07




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1828000
  custom_metrics: {}
  date: 2021-10-29_10-46-27
  done: false
  episode_len_mean: 263.83
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.383000000000106
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6090
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6928354454200889
          cur_lr: 5.000000000000001e-05
          entropy: 1.5109814431932238
          entropy_coeff: 0.009999999999999998
          kl: 0.03150925674685537
          policy_loss: 0.0007224669886959923
          total_loss: 0.7826739355921746
          vf_explained_var: 0.4616565704345703
          vf_loss: 0.7752305520905389
    num_agent_steps_sampled: 1828000
    num_agent_steps_trained: 1828000
    num_steps_sampled: 1828000
    num_steps_trained: 1828000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1828,49187.7,1828000,-26.383,-20.7,-41.8,263.83


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1829000
  custom_metrics: {}
  date: 2021-10-29_10-46-55
  done: false
  episode_len_mean: 264.51
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.451000000000107
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 6093
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0392531681301336
          cur_lr: 5.000000000000001e-05
          entropy: 1.3273267818821801
          entropy_coeff: 0.009999999999999998
          kl: 0.006475403605254115
          policy_loss: 0.07245072565144962
          total_loss: 0.6570278359784021
          vf_explained_var: 0.6606254577636719
          vf_loss: 0.5911207870476776
    num_agent_steps_sampled: 1829000
    num_agent_steps_trained: 1829000
    num_steps_sampled: 1829000
    num_steps_trained: 1829000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1829,49215.6,1829000,-26.451,-20.7,-41.8,264.51


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1830000
  custom_metrics: {}
  date: 2021-10-29_10-47-24
  done: false
  episode_len_mean: 264.8
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.480000000000103
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6097
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0392531681301336
          cur_lr: 5.000000000000001e-05
          entropy: 1.3616055369377136
          entropy_coeff: 0.009999999999999998
          kl: 0.006435585499966281
          policy_loss: 0.02544059082865715
          total_loss: 0.546783177057902
          vf_explained_var: 0.736698567867279
          vf_loss: 0.5282704379823473
    num_agent_steps_sampled: 1830000
    num_agent_steps_trained: 1830000
    num_steps_sampled: 1830000
    num_steps_trained: 1830000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1830,49243.9,1830000,-26.48,-20.7,-41.8,264.8


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1831000
  custom_metrics: {}
  date: 2021-10-29_10-47-54
  done: false
  episode_len_mean: 264.58
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.458000000000112
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6101
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0392531681301336
          cur_lr: 5.000000000000001e-05
          entropy: 0.9771586583720313
          entropy_coeff: 0.009999999999999998
          kl: 0.0034674699530812506
          policy_loss: 0.05139245246019628
          total_loss: 0.7783299994137552
          vf_explained_var: 0.5219407677650452
          vf_loss: 0.7331055555078718
    num_agent_steps_sampled: 1831000
    num_agent_steps_trained: 1831000
    num_steps_sampled: 1831000
    num_steps_trained: 1831000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1831,49274.4,1831000,-26.458,-20.7,-41.8,264.58


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1832000
  custom_metrics: {}
  date: 2021-10-29_10-48-23
  done: false
  episode_len_mean: 265.17
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.517000000000106
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6105
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.4440585878160266
          entropy_coeff: 0.009999999999999998
          kl: 0.011334807405353016
          policy_loss: 0.029785480350255966
          total_loss: 1.0170914264188873
          vf_explained_var: 0.4103902578353882
          vf_loss: 0.9958566784030861
    num_agent_steps_sampled: 1832000
    num_agent_steps_trained: 1832000
    num_steps_sampled: 1832000
    num_steps_trained: 1832000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1832,49303.1,1832000,-26.517,-20.7,-41.8,265.17


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1833000
  custom_metrics: {}
  date: 2021-10-29_10-48-52
  done: false
  episode_len_mean: 265.24
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.524000000000104
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6109
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 0.9600883940855662
          entropy_coeff: 0.009999999999999998
          kl: 0.013868701340150637
          policy_loss: 0.09150059289402432
          total_loss: 0.5711259698702229
          vf_explained_var: 0.8935920596122742
          vf_loss: 0.48201972196499504
    num_agent_steps_sampled: 1833000
    num_agent_steps_trained: 1833000
    num_steps_sampled: 1833000
    num_steps_trained: 1833000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1833,49332.1,1833000,-26.524,-20.7,-41.8,265.24


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1834000
  custom_metrics: {}
  date: 2021-10-29_10-49-19
  done: false
  episode_len_mean: 266.08
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.608000000000114
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 3
  episodes_total: 6112
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.1089729554123349
          entropy_coeff: 0.009999999999999998
          kl: 0.009327165059966685
          policy_loss: -0.10399125532971489
          total_loss: 1.1030652400520113
          vf_explained_var: 0.427808552980423
          vf_loss: 1.2132995802495214
    num_agent_steps_sampled: 1834000
    num_agent_steps_trained: 1834000
    num_steps_sampled: 1834000
    num_steps_trained: 1834000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1834,49359.3,1834000,-26.608,-20.7,-41.8,266.08


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1835000
  custom_metrics: {}
  date: 2021-10-29_10-49-46
  done: false
  episode_len_mean: 266.28
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.62800000000011
  episode_reward_min: -41.800000000000324
  episodes_this_iter: 4
  episodes_total: 6116
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.358435316880544
          entropy_coeff: 0.009999999999999998
          kl: 0.013465810192064253
          policy_loss: -0.03511699338754018
          total_loss: 0.7120390345652898
          vf_explained_var: 0.488232284784317
          vf_loss: 0.7537431902355618
    num_agent_steps_sampled: 1835000
    num_agent_steps_trained: 1835000
    num_steps_sampled: 1835000
    num_steps_trained: 1835000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1835,49386.7,1835000,-26.628,-20.7,-41.8,266.28


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1836000
  custom_metrics: {}
  date: 2021-10-29_10-50-05
  done: false
  episode_len_mean: 269.91
  episode_media: {}
  episode_reward_max: -20.700000000000024
  episode_reward_mean: -26.99100000000011
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 3
  episodes_total: 6119
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.472521718343099
          entropy_coeff: 0.009999999999999998
          kl: 0.011399540603233246
          policy_loss: 0.09450828292303615
          total_loss: 1.0286898814969592
          vf_explained_var: 0.44163376092910767
          vf_loss: 0.9429833190308676
    num_agent_steps_sampled: 1836000
    num_agent_steps_trained: 1836000
    num_steps_sampled: 1836000
    num_steps_trained: 1836000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1836,49405.6,1836000,-26.991,-20.7,-45.7,269.91




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1837000
  custom_metrics: {}
  date: 2021-10-29_10-50-51
  done: false
  episode_len_mean: 269.23
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -26.923000000000112
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 3
  episodes_total: 6122
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.0996319313844045
          entropy_coeff: 0.009999999999999998
          kl: 0.0065059009212666925
          policy_loss: -0.07088634678059154
          total_loss: 0.79189100795322
          vf_explained_var: 0.5387452840805054
          vf_loss: 0.8703930324978298
    num_agent_steps_sampled: 1837000
    num_agent_steps_trained: 1837000
    num_steps_sampled: 1837000
    num_steps_trained: 1837000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1837,49451.2,1837000,-26.923,-19.7,-45.7,269.23


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1838000
  custom_metrics: {}
  date: 2021-10-29_10-51-17
  done: false
  episode_len_mean: 272.06
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -27.206000000000113
  episode_reward_min: -45.70000000000038
  episodes_this_iter: 4
  episodes_total: 6126
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.1016468134191302
          entropy_coeff: 0.009999999999999998
          kl: 0.019054416276810572
          policy_loss: 0.048346209484669896
          total_loss: 0.8138509001996782
          vf_explained_var: 0.4865265190601349
          vf_loss: 0.7666199796729618
    num_agent_steps_sampled: 1838000
    num_agent_steps_trained: 1838000
    num_steps_sampled: 1838000
    num_steps_trained: 1838000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1838,49477.7,1838000,-27.206,-19.7,-45.7,272.06


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1839000
  custom_metrics: {}
  date: 2021-10-29_10-51-38
  done: false
  episode_len_mean: 276.92
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -27.69200000000012
  episode_reward_min: -47.70000000000041
  episodes_this_iter: 3
  episodes_total: 6129
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.3440457198354934
          entropy_coeff: 0.009999999999999998
          kl: 0.0074991824870803865
          policy_loss: 0.04606514697273572
          total_loss: 0.8515104361706309
          vf_explained_var: 0.45297771692276
          vf_loss: 0.8149889699700806
    num_agent_steps_sampled: 1839000
    num_agent_steps_trained: 1839000
    num_steps_sampled: 1839000
    num_steps_trained: 1839000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1839,49498.6,1839000,-27.692,-19.7,-47.7,276.92


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1840000
  custom_metrics: {}
  date: 2021-10-29_10-52-02
  done: false
  episode_len_mean: 279.32
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -27.932000000000127
  episode_reward_min: -47.70000000000041
  episodes_this_iter: 3
  episodes_total: 6132
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.208115926053789
          entropy_coeff: 0.009999999999999998
          kl: 0.012340654558211856
          policy_loss: -0.04198911620510949
          total_loss: 0.6309076438347498
          vf_explained_var: 0.6758999824523926
          vf_loss: 0.6785653855651617
    num_agent_steps_sampled: 1840000
    num_agent_steps_trained: 1840000
    num_steps_sampled: 1840000
    num_steps_trained: 1840000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1840,49522.1,1840000,-27.932,-19.7,-47.7,279.32


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1841000
  custom_metrics: {}
  date: 2021-10-29_10-52-23
  done: false
  episode_len_mean: 281.29
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -28.12900000000013
  episode_reward_min: -47.70000000000041
  episodes_this_iter: 2
  episodes_total: 6134
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.2210435655381944
          entropy_coeff: 0.009999999999999998
          kl: 0.01934142998740798
          policy_loss: -0.09348490221632852
          total_loss: 0.8798223048448562
          vf_explained_var: 0.4253894090652466
          vf_loss: 0.9754673313763407
    num_agent_steps_sampled: 1841000
    num_agent_steps_trained: 1841000
    num_steps_sampled: 1841000
    num_steps_trained: 1841000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1841,49543.1,1841000,-28.129,-19.7,-47.7,281.29


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1842000
  custom_metrics: {}
  date: 2021-10-29_10-52-52
  done: false
  episode_len_mean: 283.48
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -28.348000000000134
  episode_reward_min: -47.70000000000041
  episodes_this_iter: 4
  episodes_total: 6138
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.1983849313524035
          entropy_coeff: 0.009999999999999998
          kl: 0.007457006703613287
          policy_loss: 0.005417374190356997
          total_loss: 1.1324671473768022
          vf_explained_var: 0.19342918694019318
          vf_loss: 1.1351587580309974
    num_agent_steps_sampled: 1842000
    num_agent_steps_trained: 1842000
    num_steps_sampled: 1842000
    num_steps_trained: 1842000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1842,49572.4,1842000,-28.348,-19.7,-47.7,283.48


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1843000
  custom_metrics: {}
  date: 2021-10-29_10-53-17
  done: false
  episode_len_mean: 285.9
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -28.590000000000142
  episode_reward_min: -49.80000000000044
  episodes_this_iter: 3
  episodes_total: 6141
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.1255894978841146
          entropy_coeff: 0.009999999999999998
          kl: 0.013716351508213797
          policy_loss: 0.0813733008172777
          total_loss: 0.5764752013815774
          vf_explained_var: 0.39915895462036133
          vf_loss: 0.49923041988060707
    num_agent_steps_sampled: 1843000
    num_agent_steps_trained: 1843000
    num_steps_sampled: 1843000
    num_steps_trained: 1843000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1843,49597.3,1843000,-28.59,-19.7,-49.8,285.9


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1844000
  custom_metrics: {}
  date: 2021-10-29_10-53-41
  done: false
  episode_len_mean: 287.21
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -28.721000000000146
  episode_reward_min: -49.80000000000044
  episodes_this_iter: 3
  episodes_total: 6144
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.2374345620473226
          entropy_coeff: 0.009999999999999998
          kl: 0.011233318710510334
          policy_loss: -0.0014878472106324302
          total_loss: 0.9393678749601047
          vf_explained_var: -0.023828480392694473
          vf_loss: 0.9473929312907987
    num_agent_steps_sampled: 1844000
    num_agent_steps_trained: 1844000
    num_steps_sampled: 1844000
    num_steps_trained: 1844

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1844,49621.5,1844000,-28.721,-19.7,-49.8,287.21


Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1845000
  custom_metrics: {}
  date: 2021-10-29_10-54-02
  done: false
  episode_len_mean: 289.35
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -28.935000000000144
  episode_reward_min: -49.80000000000044
  episodes_this_iter: 3
  episodes_total: 6147
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5196265840650668
          cur_lr: 5.000000000000001e-05
          entropy: 1.3750950031810336
          entropy_coeff: 0.009999999999999998
          kl: 0.004638684191440332
          policy_loss: 0.07223363154464298
          total_loss: 0.7846143325169881
          vf_explained_var: 0.4913325905799866
          vf_loss: 0.7237212768859334
    num_agent_steps_sampled: 1845000
    num_agent_steps_trained: 1845000
    num_steps_sampled: 1845000
    num_steps_trained: 1845000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1845,49642.2,1845000,-28.935,-19.7,-49.8,289.35




Result for PPO_my_env_d4877_00000:
  agent_timesteps_total: 1846000
  custom_metrics: {}
  date: 2021-10-29_10-54-45
  done: false
  episode_len_mean: 289.71
  episode_media: {}
  episode_reward_max: -19.70000000000001
  episode_reward_mean: -28.971000000000146
  episode_reward_min: -49.80000000000044
  episodes_this_iter: 3
  episodes_total: 6150
  experiment_id: ab3163f264094146aa0559df77a991b6
  hostname: b53f0c0cd556
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2598132920325334
          cur_lr: 5.000000000000001e-05
          entropy: 1.3577826698621114
          entropy_coeff: 0.009999999999999998
          kl: 0.016187203700055896
          policy_loss: 0.0030262505014737448
          total_loss: 0.711601487464375
          vf_explained_var: 0.14819279313087463
          vf_loss: 0.7179474050800005
    num_agent_steps_sampled: 1846000
    num_agent_steps_trained: 1846000
    num_steps_sampled: 1846000
    num_steps_trained: 1846000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_d4877_00000,RUNNING,172.17.0.3:154,1846,49685,1846000,-28.971,-19.7,-49.8,289.71


Process _WandbLoggingProcess-1:
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/integration/wandb.py", line 200, in run
    result = self.queue.get()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/miniconda/en

KeyboardInterrupt: 