In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
from torch.nn.functional import one_hot

class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        visual_features_dim = 512
        target_features_dim = 9 * 11 * 11 
        self.visual_encoder = VisualEncoder()
        self.visual_encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.target_encoder = nn.Sequential(
            nn.Conv3d(7, 1, kernel_size=1, stride=1, padding=0),
            nn.ELU(),
        )
        policy_hidden_dim = 256 
        self.policy_network = nn.Sequential(
            nn.Linear(visual_features_dim + target_features_dim, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            nn.Linear(policy_hidden_dim, policy_hidden_dim),
            nn.ELU(),
            #nn.Linear(policy_hidden_dim, policy_hidden_dim),
            #nn.ELU(),
            #nn.Linear(policy_hidden_dim, policy_hidden_dim),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(policy_hidden_dim, action_space.n)
        self.value_head = nn.Linear(policy_hidden_dim, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.visual_encoder.cuda()
            self.target_encoder.cuda()
            self.policy_network.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs']
        pov = obs['pov'].permute(0, 3, 1, 2).float() / 255.0
        target = one_hot(obs['target_grid'].long(), num_classes=7).permute(0, 4, 1, 2, 3).float()
        if self.use_cuda:
            pov.cuda()
            target.cuda()
            
        with torch.no_grad():
            visual_features = self.visual_encoder(pov)
            
        target_features = self.target_encoder(target)
        target_features = target_features.reshape(target_features.shape[0], -1)
        features = torch.cat([visual_features, target_features], dim=1)
        features = self.policy_network(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
class VisualObservationWrapper(ObsWrapper):
    def __init__(self, env, include_target=False):
        super().__init__(env)
        self.observation_space = {   
            'pov': gym.spaces.Box(low=0, high=255, shape=(64, 64, 3)),
            'inventory': gym.spaces.Box(low=0.0, high=20.0, shape=(6,)),
            'compass': gym.spaces.Box(low=-180.0, high=180.0, shape=(1,))
        }
        if include_target:
            self.observation_space['target_grid'] = \
                gym.spaces.Box(low=0, high=6, shape=(9, 11, 11))
        self.observation_space = gym.spaces.Dict(self.observation_space)

    def observation(self, obs, reward=None, done=None, info=None):
        if info is not None:
            if 'target_grid' in info:
                target_grid = info['target_grid']
                del info['target_grid']
            else:
                logger.error(f'info: {info}')
                if hasattr(self.unwrapped, 'should_reset'):
                    self.unwrapped.should_reset(True)
                target_grid = self.env.unwrapped.tasks.current.target_grid
        else:
            target_grid = self.env.unwrapped.tasks.current.target_grid
        return {
            'pov': obs['pov'].astype(np.float32),
            'inventory': obs['inventory'],
            'compass': np.array([obs['compass']['angle'].item()]),
            'target_grid': target_grid
        }

In [6]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C3', 'C17', 'C20', 'C22', 'C32', 'C40', 'C85', 'C87', 'C93']))
    #env = PovOnlyWrapper(env)
    env = VisualObservationWrapper(env, include_target=True)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [None]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name": "PPO MultiTask (C3, C17) pretrained (AngelaCNN) (3 noops after placement) r: -0.01"
                  }
              }

        },
        loggers=[WandbLogger])

2021-10-24 08:07:35,318	INFO wandb.py:170 -- Already logged into W&B.
2021-10-24 08:07:35,332	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_my_env_722d3_00000,RUNNING,


[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=43204)[0m 2021-10-24 08:07:38,823	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=43204)[0m 2021-10-24 08:07:38,824	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-24_08-08-42
  done: false
  episode_len_mean: 411.0
  episode_media: {}
  episode_reward_max: -4.069999999999958
  episode_reward_mean: -5.549999999999958
  episode_reward_min: -7.0299999999999585
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8744514915678234
          entropy_coeff: 0.009999999999999998
          kl: 0.011118928727669659
          policy_loss: 0.010810027685430315
          total_loss: 0.1933779288911157
          vf_explained_var: -0.34938526153564453
          vf_loss: 0.2090886280949538
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,1,57.9463,1000,-5.55,-4.07,-7.03,411


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-24_08-09-03
  done: false
  episode_len_mean: 413.0
  episode_media: {}
  episode_reward_max: -4.069999999999958
  episode_reward_mean: -4.849999999999957
  episode_reward_min: -7.0299999999999585
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.861661916308933
          entropy_coeff: 0.009999999999999998
          kl: 0.010620355901958254
          policy_loss: -0.06807942920260959
          total_loss: -0.07977767321798536
          vf_explained_var: -0.28024330735206604
          vf_loss: 0.014794305949989293
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,2,78.8545,2000,-4.85,-4.07,-7.03,413


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-24_08-09-24
  done: false
  episode_len_mean: 410.85714285714283
  episode_media: {}
  episode_reward_max: -4.0299999999999585
  episode_reward_mean: -4.519999999999958
  episode_reward_min: -7.0299999999999585
  episodes_this_iter: 3
  episodes_total: 7
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8465657393137613
          entropy_coeff: 0.009999999999999998
          kl: 0.0083489802030968
          policy_loss: -0.08325904938909742
          total_loss: -0.10181963907347785
          vf_explained_var: -0.20599365234375
          vf_loss: 0.00823527249869787
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,3,99.6419,3000,-4.52,-4.03,-7.03,410.857


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-24_08-09-44
  done: false
  episode_len_mean: 411.1111111111111
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -5.076666666666624
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 9
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.859794529279073
          entropy_coeff: 0.009999999999999998
          kl: 0.013833740618012768
          policy_loss: 0.0337354924943712
          total_loss: 0.11869491048985058
          vf_explained_var: 0.4513717293739319
          vf_loss: 0.11079061517698897
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,4,119.214,4000,-5.07667,-3.96,-10.09,411.111


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-24_08-10-05
  done: false
  episode_len_mean: 412.25
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.846666666666624
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 12
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.837435743543837
          entropy_coeff: 0.009999999999999998
          kl: 0.008711702867448039
          policy_loss: 0.0065767794847488405
          total_loss: -0.0132725457350413
          vf_explained_var: -0.11718424409627914
          vf_loss: 0.006782690314058628
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,5,140.142,5000,-4.84667,-3.96,-10.09,412.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-24_08-10-24
  done: false
  episode_len_mean: 416.07142857142856
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.781428571428528
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 14
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.823223026593526
          entropy_coeff: 0.009999999999999998
          kl: 0.009976829696551092
          policy_loss: 0.049318941434224446
          total_loss: 0.028417245464192498
          vf_explained_var: 0.26819100975990295
          vf_loss: 0.00533516977868405
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,6,159.699,6000,-4.78143,-3.96,-10.09,416.071


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-24_08-10-44
  done: false
  episode_len_mean: 419.25
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.735624999999956
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 16
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8353933016459147
          entropy_coeff: 0.009999999999999998
          kl: 0.010141403166554635
          policy_loss: -0.0946184415784147
          total_loss: -0.11425459103451835
          vf_explained_var: 0.24659539759159088
          vf_loss: 0.006689502454052369
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,7,178.923,7000,-4.73562,-3.96,-10.09,419.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-24_08-11-03
  done: false
  episode_len_mean: 420.05263157894734
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.657894736842061
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 19
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.839690974023607
          entropy_coeff: 0.009999999999999998
          kl: 0.00967025522509629
          policy_loss: -0.021223402188883887
          total_loss: -0.03841776235236062
          vf_explained_var: -0.052891794592142105
          vf_loss: 0.009268496644734923
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,8,198.664,8000,-4.65789,-3.96,-10.09,420.053


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-24_08-11-22
  done: false
  episode_len_mean: 422.04761904761904
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.634285714285669
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 21
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8076964537302653
          entropy_coeff: 0.009999999999999998
          kl: 0.011854977983411865
          policy_loss: 0.07734611423479186
          total_loss: 0.0551722440454695
          vf_explained_var: 0.17189611494541168
          vf_loss: 0.003532092437510275
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,9,217.309,9000,-4.63429,-3.96,-10.09,422.048


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-24_08-11-39
  done: false
  episode_len_mean: 424.2608695652174
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.62043478260865
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 23
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.7861166662640042
          entropy_coeff: 0.009999999999999998
          kl: 0.011098304905285733
          policy_loss: -0.039184086190329655
          total_loss: -0.058166556888156466
          vf_explained_var: 0.5752285718917847
          vf_loss: 0.006659034571688001
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,10,234.493,10000,-4.62043,-3.96,-10.09,424.261


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-24_08-11-56
  done: false
  episode_len_mean: 426.68
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.614399999999954
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 25
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.755362383524577
          entropy_coeff: 0.009999999999999998
          kl: 0.009791346183630978
          policy_loss: -0.03095960517724355
          total_loss: -0.04901136772500144
          vf_explained_var: 0.634289026260376
          vf_loss: 0.007543595983750291
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 11000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,11,251.438,11000,-4.6144,-3.96,-10.09,426.68


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-24_08-12-13
  done: false
  episode_len_mean: 427.7857142857143
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.588214285714239
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 28
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.736412432458666
          entropy_coeff: 0.009999999999999998
          kl: 0.01204689264868616
          policy_loss: -0.008875059253639645
          total_loss: -0.02515063981215159
          vf_explained_var: 0.27871230244636536
          vf_loss: 0.00867916217733485
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,12,268.45,12000,-4.58821,-3.96,-10.09,427.786




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-24_08-12-47
  done: false
  episode_len_mean: 428.96666666666664
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.579333333333286
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 30
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.703014678425259
          entropy_coeff: 0.009999999999999998
          kl: 0.010390502215191728
          policy_loss: 0.11042388992177116
          total_loss: 0.090174510412746
          vf_explained_var: 0.6712636947631836
          vf_loss: 0.004702663324941467
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,13,301.973,13000,-4.57933,-3.96,-10.09,428.967


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-24_08-13-06
  done: false
  episode_len_mean: 429.09375
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.562499999999954
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 32
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.732137902577718
          entropy_coeff: 0.009999999999999998
          kl: 0.011573955213368277
          policy_loss: -0.08442385378811094
          total_loss: -0.1006220183438725
          vf_explained_var: 0.38129377365112305
          vf_loss: 0.008808421273069042
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,14,321.396,14000,-4.5625,-3.96,-10.09,429.094


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-24_08-13-24
  done: false
  episode_len_mean: 427.51428571428573
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.523428571428526
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 35
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6919498576058283
          entropy_coeff: 0.009999999999999998
          kl: 0.008920100553558524
          policy_loss: 0.024468512501981524
          total_loss: 0.008813465055492188
          vf_explained_var: 0.3119675815105438
          vf_loss: 0.009480430388551515
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,15,339.64,15000,-4.52343,-3.96,-10.09,427.514


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-24_08-13-41
  done: false
  episode_len_mean: 426.6216216216216
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.501081081081035
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 37
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.683370831277635
          entropy_coeff: 0.009999999999999998
          kl: 0.010233115300331995
          policy_loss: 0.05383368730545044
          total_loss: 0.03492405100001229
          vf_explained_var: 0.2828112840652466
          vf_loss: 0.005877447223691787
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,16,356.538,16000,-4.50108,-3.96,-10.09,426.622


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-24_08-13-58
  done: false
  episode_len_mean: 426.7435897435897
  episode_media: {}
  episode_reward_max: -3.9599999999999596
  episode_reward_mean: -4.4902564102563645
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 39
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6746112532085844
          entropy_coeff: 0.009999999999999998
          kl: 0.013353589200841636
          policy_loss: -0.09082194599840376
          total_loss: -0.10448178781403436
          vf_explained_var: 0.2782447040081024
          vf_loss: 0.010415550476884366
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,17,373.647,17000,-4.49026,-3.96,-10.09,426.744


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-24_08-14-17
  done: false
  episode_len_mean: 425.2857142857143
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.459761904761859
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 42
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.679297873708937
          entropy_coeff: 0.009999999999999998
          kl: 0.012186090876523458
          policy_loss: 0.04440627329879337
          total_loss: 0.031174172378248637
          vf_explained_var: 0.10144650936126709
          vf_loss: 0.011123659937745995
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 1800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,18,391.801,18000,-4.45976,-3.84,-10.09,425.286


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-24_08-14-33
  done: false
  episode_len_mean: 425.5681818181818
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.453181818181772
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 44
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.6550605376561482
          entropy_coeff: 0.009999999999999998
          kl: 0.012156281070587452
          policy_loss: -0.11297136495510737
          total_loss: -0.12583498888545566
          vf_explained_var: 0.37492379546165466
          vf_loss: 0.011255726176831458
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,19,408.116,19000,-4.45318,-3.84,-10.09,425.568


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-24_08-14-50
  done: false
  episode_len_mean: 426.39130434782606
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.452826086956475
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 46
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.671281335088942
          entropy_coeff: 0.009999999999999998
          kl: 0.009857625127480714
          policy_loss: -0.08990887453158697
          total_loss: -0.10416888263490465
          vf_explained_var: 0.28837883472442627
          vf_loss: 0.010481280243160048
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,20,425.08,20000,-4.45283,-3.84,-10.09,426.391


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-24_08-15-07
  done: false
  episode_len_mean: 426.3061224489796
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.44040816326526
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 49
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.623565181096395
          entropy_coeff: 0.009999999999999998
          kl: 0.011883695335593922
          policy_loss: 0.05453990739252832
          total_loss: 0.04195162389013502
          vf_explained_var: 0.2568868398666382
          vf_loss: 0.011270633825592489
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 21000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,21,441.901,21000,-4.44041,-3.84,-10.09,426.306


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-24_08-15-22
  done: false
  episode_len_mean: 427.72549019607845
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.447647058823483
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 51
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.563354664378696
          entropy_coeff: 0.009999999999999998
          kl: 0.011002444768424062
          policy_loss: 0.06023230022854275
          total_loss: 0.043240092529190914
          vf_explained_var: 0.2143336981534958
          vf_loss: 0.006440854060201673
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 2200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,22,457.398,22000,-4.44765,-3.84,-10.09,427.725


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-24_08-15-38
  done: false
  episode_len_mean: 429.0943396226415
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.454905660377312
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 53
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5675582355923123
          entropy_coeff: 0.009999999999999998
          kl: 0.011907603282679228
          policy_loss: -0.07467967006895278
          total_loss: -0.08574499355422126
          vf_explained_var: -0.13716085255146027
          vf_loss: 0.012228738061579255
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,23,473.325,23000,-4.45491,-3.84,-10.09,429.094


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-24_08-15-53
  done: false
  episode_len_mean: 430.6363636363636
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.46436363636359
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 55
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.520619861284892
          entropy_coeff: 0.009999999999999998
          kl: 0.009983726878339599
          policy_loss: -0.0759664461016655
          total_loss: -0.08699069635735618
          vf_explained_var: 0.343707412481308
          vf_loss: 0.012185199071730797
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,24,488.481,24000,-4.46436,-3.84,-10.09,430.636


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-24_08-16-09
  done: false
  episode_len_mean: 432.2105263157895
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.474561403508725
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 57
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5381327205234103
          entropy_coeff: 0.009999999999999998
          kl: 0.01114251347515969
          policy_loss: -0.10203038983874851
          total_loss: -0.11309113684627745
          vf_explained_var: 0.42134422063827515
          vf_loss: 0.012092076353858122
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 250

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,25,504.48,25000,-4.47456,-3.84,-10.09,432.211




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-24_08-16-42
  done: false
  episode_len_mean: 433.1
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.475833333333285
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 60
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.530299435721503
          entropy_coeff: 0.009999999999999998
          kl: 0.009969468588732709
          policy_loss: 0.017101978758970897
          total_loss: 0.005712219741609362
          vf_explained_var: 0.5163366198539734
          vf_loss: 0.011919343473790731
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,26,537.185,26000,-4.47583,-3.84,-10.09,433.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-24_08-17-01
  done: false
  episode_len_mean: 434.43548387096774
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.484516129032209
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 62
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5289483308792113
          entropy_coeff: 0.009999999999999998
          kl: 0.010785509053018514
          policy_loss: 0.06586531839436954
          total_loss: 0.048698706014288794
          vf_explained_var: 0.6415259838104248
          vf_loss: 0.005965768612804822
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,27,555.611,27000,-4.48452,-3.84,-10.09,434.435


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-24_08-17-16
  done: false
  episode_len_mean: 435.734375
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.493124999999951
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 64
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4877820518281726
          entropy_coeff: 0.009999999999999998
          kl: 0.009382942761941375
          policy_loss: 0.13314620819356707
          total_loss: 0.11571755127774344
          vf_explained_var: 0.3060520887374878
          vf_loss: 0.005572573777640678
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,28,571.099,28000,-4.49312,-3.84,-10.09,435.734


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-24_08-17-33
  done: false
  episode_len_mean: 435.7121212121212
  episode_media: {}
  episode_reward_max: -3.839999999999962
  episode_reward_mean: -4.4887878787878295
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 66
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5270359410179988
          entropy_coeff: 0.009999999999999998
          kl: 0.012996312800641065
          policy_loss: -0.06688814494344923
          total_loss: -0.07676625400781631
          vf_explained_var: 0.22202962636947632
          vf_loss: 0.012792980916856322
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,29,588.044,29000,-4.48879,-3.84,-10.09,435.712


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-24_08-17-51
  done: false
  episode_len_mean: 433.8985507246377
  episode_media: {}
  episode_reward_max: -3.799999999999963
  episode_reward_mean: -4.464927536231835
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 69
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.5135796944300335
          entropy_coeff: 0.009999999999999998
          kl: 0.011944662645418753
          policy_loss: 0.02727724379963345
          total_loss: 0.016026020215617286
          vf_explained_var: 0.21930065751075745
          vf_loss: 0.011495639601101478
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,30,605.947,30000,-4.46493,-3.8,-10.09,433.899


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-24_08-18-09
  done: false
  episode_len_mean: 433.0422535211268
  episode_media: {}
  episode_reward_max: -3.799999999999963
  episode_reward_mean: -4.452816901408402
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 71
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.437154931492276
          entropy_coeff: 0.009999999999999998
          kl: 0.016295060089372378
          policy_loss: -0.09422760274675157
          total_loss: -0.10228910313712226
          vf_explained_var: 0.17173489928245544
          vf_loss: 0.01305103307758044
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 3100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,31,623.987,31000,-4.45282,-3.8,-10.09,433.042


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-24_08-18-29
  done: false
  episode_len_mean: 431.43243243243245
  episode_media: {}
  episode_reward_max: -3.799999999999963
  episode_reward_mean: -4.431756756756708
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 74
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.46384694841173
          entropy_coeff: 0.009999999999999998
          kl: 0.010209836227356502
          policy_loss: 0.04388834916883045
          total_loss: 0.03522456793321504
          vf_explained_var: 0.31172850728034973
          vf_loss: 0.013932719443659557
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,32,643.523,32000,-4.43176,-3.8,-10.09,431.432


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-24_08-18-49
  done: false
  episode_len_mean: 429.69736842105266
  episode_media: {}
  episode_reward_max: -3.5299999999999687
  episode_reward_mean: -4.411315789473636
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 76
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4499763912624783
          entropy_coeff: 0.009999999999999998
          kl: 0.01043101857484924
          policy_loss: -0.10733234435319901
          total_loss: -0.11643342855903838
          vf_explained_var: 0.1555488258600235
          vf_loss: 0.013312480237800628
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,33,663.446,33000,-4.41132,-3.53,-10.09,429.697


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-24_08-19-08
  done: false
  episode_len_mean: 427.3544303797468
  episode_media: {}
  episode_reward_max: -3.5299999999999687
  episode_reward_mean: -4.383544303797421
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 79
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.417565001381768
          entropy_coeff: 0.009999999999999998
          kl: 0.013471375473407063
          policy_loss: -0.0849065302974648
          total_loss: -0.09017350872357686
          vf_explained_var: 0.1353253573179245
          vf_loss: 0.01621439487983783
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,34,683.004,34000,-4.38354,-3.53,-10.09,427.354


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-24_08-19-26
  done: false
  episode_len_mean: 425.5487804878049
  episode_media: {}
  episode_reward_max: -3.5299999999999687
  episode_reward_mean: -4.3614634146341
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 82
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.4415612432691787
          entropy_coeff: 0.009999999999999998
          kl: 0.012099474625682453
          policy_loss: 0.04749069834748904
          total_loss: 0.038251183513138026
          vf_explained_var: 0.30535924434661865
          vf_loss: 0.012756204310183724
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 3500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,35,701.186,35000,-4.36146,-3.53,-10.09,425.549


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-24_08-19-45
  done: false
  episode_len_mean: 424.0833333333333
  episode_media: {}
  episode_reward_max: -3.5299999999999687
  episode_reward_mean: -4.344285714285668
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 2
  episodes_total: 84
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.402297102080451
          entropy_coeff: 0.009999999999999998
          kl: 0.0122483763742256
          policy_loss: -0.10054667327139112
          total_loss: -0.11010134634044436
          vf_explained_var: 0.2945574223995209
          vf_loss: 0.012018624146650028
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,36,720.021,36000,-4.34429,-3.53,-10.09,424.083


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-24_08-20-05
  done: false
  episode_len_mean: 421.9655172413793
  episode_media: {}
  episode_reward_max: -3.509999999999969
  episode_reward_mean: -4.319540229885012
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 87
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.3138816462622747
          entropy_coeff: 0.009999999999999998
          kl: 0.014836582666710384
          policy_loss: -0.10346178172363175
          total_loss: -0.10832933361331622
          vf_explained_var: 0.20402416586875916
          vf_loss: 0.015303946472704411
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,37,739.748,37000,-4.31954,-3.51,-10.09,421.966




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-24_08-20-41
  done: false
  episode_len_mean: 419.3666666666667
  episode_media: {}
  episode_reward_max: -3.2599999999999745
  episode_reward_mean: -4.2902222222221775
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 90
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.255222135119968
          entropy_coeff: 0.009999999999999998
          kl: 0.009693279864070566
          policy_loss: -0.10384100824594497
          total_loss: -0.10881879130999247
          vf_explained_var: 0.19107858836650848
          vf_loss: 0.015635784746458135
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,38,775.533,38000,-4.29022,-3.26,-10.09,419.367


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-24_08-21-03
  done: false
  episode_len_mean: 417.1505376344086
  episode_media: {}
  episode_reward_max: -3.2599999999999745
  episode_reward_mean: -4.264946236559096
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 93
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.306727584203084
          entropy_coeff: 0.009999999999999998
          kl: 0.01072664265548787
          policy_loss: 0.013165178563859728
          total_loss: 0.0034286708467536503
          vf_explained_var: 0.288860559463501
          vf_loss: 0.01118543958873488
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 39000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,39,797.95,39000,-4.26495,-3.26,-10.09,417.151


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-24_08-21-25
  done: false
  episode_len_mean: 414.2604166666667
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -4.233124999999956
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 96
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.282320949766371
          entropy_coeff: 0.009999999999999998
          kl: 0.006877908231229904
          policy_loss: -0.08852307010028097
          total_loss: -0.09493836934367815
          vf_explained_var: 0.1551164835691452
          vf_loss: 0.015032326978527837
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 4000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,40,819.513,40000,-4.23312,-3.23,-10.09,414.26


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-24_08-21-46
  done: false
  episode_len_mean: 411.3939393939394
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.201717171717128
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 99
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.225383069780138
          entropy_coeff: 0.009999999999999998
          kl: 0.008831454083722868
          policy_loss: -0.10611400827765465
          total_loss: -0.11243879480494393
          vf_explained_var: 0.2681999206542969
          vf_loss: 0.014162752259936597
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 4100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,41,841.205,41000,-4.20172,-3.14,-10.09,411.394


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-24_08-22-08
  done: false
  episode_len_mean: 408.84
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.146499999999956
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 102
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1703269481658936
          entropy_coeff: 0.009999999999999998
          kl: 0.00704059778421888
          policy_loss: -0.12398050791687436
          total_loss: -0.12973860369788276
          vf_explained_var: 0.25312182307243347
          vf_loss: 0.014537052220354478
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,42,862.518,42000,-4.1465,-3.14,-10.09,408.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-24_08-22-29
  done: false
  episode_len_mean: 406.21
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.120199999999957
  episode_reward_min: -10.089999999999957
  episodes_this_iter: 3
  episodes_total: 105
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.139013658629523
          entropy_coeff: 0.009999999999999998
          kl: 0.011516283211298155
          policy_loss: -0.12916147278414833
          total_loss: -0.13436100482940674
          vf_explained_var: 0.2880769371986389
          vf_loss: 0.01388734994042251
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,43,883.357,43000,-4.1202,-3.14,-10.09,406.21


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-24_08-22-50
  done: false
  episode_len_mean: 403.84
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.038399999999958
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 108
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.167924279636807
          entropy_coeff: 0.009999999999999998
          kl: 0.012825093253100937
          policy_loss: -0.13624530169698928
          total_loss: -0.14168901104066106
          vf_explained_var: 0.32442471385002136
          vf_loss: 0.013670514399806658
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,44,904.265,44000,-4.0384,-3.14,-4.97,403.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-24_08-23-11
  done: false
  episode_len_mean: 400.4
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -4.003999999999959
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 4
  episodes_total: 112
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.1177324453989663
          entropy_coeff: 0.009999999999999998
          kl: 0.011636431947971617
          policy_loss: -0.019991370870007408
          total_loss: -0.02630259816845258
          vf_explained_var: 0.38280394673347473
          vf_loss: 0.012538810219201777
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,45,925.485,45000,-4.004,-3.14,-4.97,400.4


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-24_08-23-32
  done: false
  episode_len_mean: 397.1
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -3.9709999999999592
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 115
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.0983935409122045
          entropy_coeff: 0.009999999999999998
          kl: 0.010292460598659956
          policy_loss: -0.008295932743284438
          total_loss: -0.01687710169288847
          vf_explained_var: 0.5496423244476318
          vf_loss: 0.010344273077660344
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,46,946.591,46000,-3.971,-3.14,-4.97,397.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-24_08-23-52
  done: false
  episode_len_mean: 395.5
  episode_media: {}
  episode_reward_max: -3.139999999999977
  episode_reward_mean: -3.9549999999999597
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 2
  episodes_total: 117
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.089154595798916
          entropy_coeff: 0.009999999999999998
          kl: 0.00922194843908607
          policy_loss: -0.10494527121384939
          total_loss: -0.11272411031855477
          vf_explained_var: 0.43306809663772583
          vf_loss: 0.011268320754364443
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,47,967.001,47000,-3.955,-3.14,-4.97,395.5




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-24_08-24-31
  done: false
  episode_len_mean: 392.4
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.9239999999999604
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 120
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.022998176680671
          entropy_coeff: 0.009999999999999998
          kl: 0.012240771058387483
          policy_loss: -0.10362455927663379
          total_loss: -0.1085675737924046
          vf_explained_var: 0.2935118079185486
          vf_loss: 0.012838814324802822
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,48,1005.34,48000,-3.924,-3.06,-4.97,392.4


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-24_08-24-49
  done: false
  episode_len_mean: 390.2
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.901999999999962
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 123
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9688320305612352
          entropy_coeff: 0.009999999999999998
          kl: 0.006836758869380017
          policy_loss: -0.013539054741462072
          total_loss: -0.02007720892628034
          vf_explained_var: 0.0753578469157219
          vf_loss: 0.011782817495986819
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,49,1023.99,49000,-3.902,-3.06,-4.97,390.2


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-24_08-25-09
  done: false
  episode_len_mean: 387.59
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.8758999999999624
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 126
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.9329132053587172
          entropy_coeff: 0.009999999999999998
          kl: 0.00702432304609412
          policy_loss: 0.045699185132980345
          total_loss: 0.03793884184625414
          vf_explained_var: 0.3555440306663513
          vf_loss: 0.010163927084714587
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,50,1043.05,50000,-3.8759,-3.06,-4.97,387.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-24_08-25-28
  done: false
  episode_len_mean: 385.11
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.851099999999962
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 129
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.866403913497925
          entropy_coeff: 0.009999999999999998
          kl: 0.008687259510645143
          policy_loss: 0.07155091646644804
          total_loss: 0.06368989513980018
          vf_explained_var: 0.37934768199920654
          vf_loss: 0.009065564325687269
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,51,1062.2,51000,-3.8511,-3.06,-4.97,385.11


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-24_08-25-48
  done: false
  episode_len_mean: 383.82
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.8381999999999623
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 2
  episodes_total: 131
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.865984825293223
          entropy_coeff: 0.009999999999999998
          kl: 0.005055859477642461
          policy_loss: -0.09855056239498985
          total_loss: -0.1037836684121026
          vf_explained_var: 0.461625874042511
          vf_loss: 0.01241556713745619
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,52,1082.42,52000,-3.8382,-3.06,-4.97,383.82


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-24_08-26-08
  done: false
  episode_len_mean: 381.84
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.8183999999999627
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 134
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7607927097214593
          entropy_coeff: 0.009999999999999998
          kl: 0.008125986304557959
          policy_loss: -0.09755575218134456
          total_loss: -0.0989173481033908
          vf_explained_var: 0.28694474697113037
          vf_loss: 0.01462113375051154
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,53,1102.46,53000,-3.8184,-3.06,-4.97,381.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-24_08-26-28
  done: false
  episode_len_mean: 380.21
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.8020999999999634
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 137
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 1.7439117458131579
          entropy_coeff: 0.009999999999999998
          kl: 0.003591101851237887
          policy_loss: -0.02893312391307619
          total_loss: -0.03543046365181605
          vf_explained_var: 0.43719762563705444
          vf_loss: 0.010223552592813373
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,54,1122,54000,-3.8021,-3.06,-4.97,380.21


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-24_08-26-47
  done: false
  episode_len_mean: 378.24
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.782399999999963
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 140
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7415945026609632
          entropy_coeff: 0.009999999999999998
          kl: 0.010558143968108051
          policy_loss: 0.05305341995424694
          total_loss: 0.0492022847963704
          vf_explained_var: 0.22362232208251953
          vf_loss: 0.012508996498460571
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,55,1141.18,55000,-3.7824,-3.06,-4.97,378.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-24_08-27-07
  done: false
  episode_len_mean: 376.02
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.760199999999963
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 143
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7415026757452223
          entropy_coeff: 0.009999999999999998
          kl: 0.01237097055622469
          policy_loss: 0.03785173743963242
          total_loss: 0.03593963210781415
          vf_explained_var: -0.08670564740896225
          vf_loss: 0.014265824760271547
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,56,1161.26,56000,-3.7602,-3.06,-4.97,376.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-24_08-27-26
  done: false
  episode_len_mean: 373.42
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.7341999999999644
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 146
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7756770332654317
          entropy_coeff: 0.009999999999999998
          kl: 0.010361319000686306
          policy_loss: 0.06176967720190684
          total_loss: 0.05597426725758447
          vf_explained_var: 0.42495986819267273
          vf_loss: 0.010925228949539208
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,57,1180.13,57000,-3.7342,-3.06,-4.97,373.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-24_08-27-45
  done: false
  episode_len_mean: 372.31
  episode_media: {}
  episode_reward_max: -3.0599999999999787
  episode_reward_mean: -3.723099999999964
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 2
  episodes_total: 148
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.811038006676568
          entropy_coeff: 0.009999999999999998
          kl: 0.013406364485792781
          policy_loss: -0.09335294134087033
          total_loss: -0.09907068444622888
          vf_explained_var: 0.5388706922531128
          vf_loss: 0.01105199902748508
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,58,1199.01,58000,-3.7231,-3.06,-4.97,372.31




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-24_08-28-22
  done: false
  episode_len_mean: 368.42
  episode_media: {}
  episode_reward_max: -2.7299999999999858
  episode_reward_mean: -3.6841999999999655
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 151
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8214438411924574
          entropy_coeff: 0.009999999999999998
          kl: 0.014956307331141576
          policy_loss: -0.135616528077258
          total_loss: -0.13986210889286466
          vf_explained_var: 0.49320492148399353
          vf_loss: 0.012473226017836068
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,59,1236.33,59000,-3.6842,-2.73,-4.97,368.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-24_08-28-43
  done: false
  episode_len_mean: 362.45
  episode_media: {}
  episode_reward_max: -2.7299999999999858
  episode_reward_mean: -3.6244999999999665
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 4
  episodes_total: 155
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.894412222173479
          entropy_coeff: 0.009999999999999998
          kl: 0.01018708583713198
          policy_loss: 0.024392758806546528
          total_loss: 0.016251179037822618
          vf_explained_var: 0.5709729790687561
          vf_loss: 0.009783833794709708
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,60,1257.82,60000,-3.6245,-2.73,-4.97,362.45


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-24_08-29-04
  done: false
  episode_len_mean: 358.29
  episode_media: {}
  episode_reward_max: -2.7299999999999858
  episode_reward_mean: -3.582899999999967
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 158
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8662583867708842
          entropy_coeff: 0.009999999999999998
          kl: 0.013804730969481425
          policy_loss: 0.05307852774858475
          total_loss: 0.0436277509563499
          vf_explained_var: 0.6569839715957642
          vf_loss: 0.007831333135254682
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,61,1278.13,61000,-3.5829,-2.73,-4.97,358.29


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-24_08-29-26
  done: false
  episode_len_mean: 353.92
  episode_media: {}
  episode_reward_max: -2.7299999999999858
  episode_reward_mean: -3.539199999999968
  episode_reward_min: -4.9699999999999385
  episodes_this_iter: 3
  episodes_total: 161
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8311823023690117
          entropy_coeff: 0.009999999999999998
          kl: 0.009739059895931589
          policy_loss: -0.015966878996955022
          total_loss: -0.026600599454508888
          vf_explained_var: 0.7732773423194885
          vf_loss: 0.006704194644569523
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,62,1300.01,62000,-3.5392,-2.73,-4.97,353.92


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-24_08-29-47
  done: false
  episode_len_mean: 348.97
  episode_media: {}
  episode_reward_max: -2.7299999999999858
  episode_reward_mean: -3.489699999999969
  episode_reward_min: -4.379999999999951
  episodes_this_iter: 3
  episodes_total: 164
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8307203226619297
          entropy_coeff: 0.009999999999999998
          kl: 0.013578465374648848
          policy_loss: -0.014523006478945414
          total_loss: -0.02267890969912211
          vf_explained_var: 0.7049886584281921
          vf_loss: 0.008793453757082008
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,63,1320.91,63000,-3.4897,-2.73,-4.38,348.97


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-24_08-30-09
  done: false
  episode_len_mean: 345.78
  episode_media: {}
  episode_reward_max: -2.7299999999999858
  episode_reward_mean: -3.45779999999997
  episode_reward_min: -4.099999999999957
  episodes_this_iter: 3
  episodes_total: 167
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.770756799644894
          entropy_coeff: 0.009999999999999998
          kl: 0.012562000068824533
          policy_loss: -0.056044003201855555
          total_loss: -0.06341979884439045
          vf_explained_var: 0.6888992190361023
          vf_loss: 0.00907557372831636
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,64,1342.87,64000,-3.4578,-2.73,-4.1,345.78


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-24_08-30-30
  done: false
  episode_len_mean: 341.9
  episode_media: {}
  episode_reward_max: -2.7299999999999858
  episode_reward_mean: -3.4189999999999707
  episode_reward_min: -4.039999999999958
  episodes_this_iter: 4
  episodes_total: 171
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.807886016368866
          entropy_coeff: 0.009999999999999998
          kl: 0.012238267718497219
          policy_loss: 0.03253235924575064
          total_loss: 0.023507712615860835
          vf_explained_var: 0.7521864175796509
          vf_loss: 0.00783038576030069
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,65,1364.05,65000,-3.419,-2.73,-4.04,341.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-24_08-30-51
  done: false
  episode_len_mean: 339.62
  episode_media: {}
  episode_reward_max: -2.7299999999999858
  episode_reward_mean: -3.3961999999999715
  episode_reward_min: -3.92999999999996
  episodes_this_iter: 3
  episodes_total: 174
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.796355508433448
          entropy_coeff: 0.009999999999999998
          kl: 0.013805927456565965
          policy_loss: -0.004179738296402826
          total_loss: -0.008866153905789058
          vf_explained_var: 0.3493923246860504
          vf_loss: 0.011896547437128093
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,66,1385.11,66000,-3.3962,-2.73,-3.93,339.62


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-24_08-31-13
  done: false
  episode_len_mean: 337.49
  episode_media: {}
  episode_reward_max: -2.7299999999999858
  episode_reward_mean: -3.3748999999999723
  episode_reward_min: -3.92999999999996
  episodes_this_iter: 3
  episodes_total: 177
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.811749964290195
          entropy_coeff: 0.009999999999999998
          kl: 0.012457437173675699
          policy_loss: -0.013705792360835606
          total_loss: -0.017697597377830082
          vf_explained_var: 0.4034450948238373
          vf_loss: 0.012879953772709188
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,67,1407.62,67000,-3.3749,-2.73,-3.93,337.49




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-24_08-31-53
  done: false
  episode_len_mean: 334.27
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.3426999999999727
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 4
  episodes_total: 181
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7822019947899712
          entropy_coeff: 0.009999999999999998
          kl: 0.01320787361204694
          policy_loss: -0.0024102050397131177
          total_loss: -0.0027154053250948587
          vf_explained_var: 0.2899261713027954
          vf_loss: 0.016196028795093298
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,68,1447.22,68000,-3.3427,-2.49,-3.89,334.27


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-24_08-32-16
  done: false
  episode_len_mean: 331.85
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.3184999999999722
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 184
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7795431719885932
          entropy_coeff: 0.009999999999999998
          kl: 0.006074596159440541
          policy_loss: -0.002547616180446413
          total_loss: -0.008371209767129685
          vf_explained_var: 0.28584325313568115
          vf_loss: 0.011364378526599871
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,69,1470.42,69000,-3.3185,-2.49,-3.89,331.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-24_08-32-39
  done: false
  episode_len_mean: 329.0
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.2899999999999734
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 4
  episodes_total: 188
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.784897842672136
          entropy_coeff: 0.009999999999999998
          kl: 0.009695851989925468
          policy_loss: -0.001725244190957811
          total_loss: -0.003128023776743147
          vf_explained_var: 0.2168770432472229
          vf_loss: 0.015476612891587947
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,70,1493.41,70000,-3.29,-2.49,-3.89,329


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-24_08-33-02
  done: false
  episode_len_mean: 327.51
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.275099999999974
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 191
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8240939180056255
          entropy_coeff: 0.009999999999999998
          kl: 0.007776384080073247
          policy_loss: 0.020491718583636815
          total_loss: 0.017300126287672253
          vf_explained_var: -0.052982673048973083
          vf_loss: 0.014271708324344622
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,71,1515.91,71000,-3.2751,-2.49,-3.89,327.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-24_08-33-25
  done: false
  episode_len_mean: 326.0
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.2599999999999745
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 194
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7852803932295904
          entropy_coeff: 0.009999999999999998
          kl: 0.0069100546582751225
          policy_loss: -0.1025126670797666
          total_loss: -0.10444908936818441
          vf_explained_var: 0.22104428708553314
          vf_loss: 0.015225374719334973
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,72,1539.19,72000,-3.26,-2.49,-3.89,326


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-24_08-33-48
  done: false
  episode_len_mean: 324.56
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.2455999999999743
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 4
  episodes_total: 198
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8315850310855442
          entropy_coeff: 0.009999999999999998
          kl: 0.010483736574511265
          policy_loss: 0.05397894299692578
          total_loss: 0.047612876445055005
          vf_explained_var: 0.4256829023361206
          vf_loss: 0.010901412246231404
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,73,1561.91,73000,-3.2456,-2.49,-3.89,324.56


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-24_08-34-10
  done: false
  episode_len_mean: 323.64
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.236399999999975
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 201
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8601400441593594
          entropy_coeff: 0.009999999999999998
          kl: 0.00875263669141396
          policy_loss: -0.09201036476426655
          total_loss: -0.09389106780290604
          vf_explained_var: 0.2356061339378357
          vf_loss: 0.015845431573688985
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,74,1584.52,74000,-3.2364,-2.49,-3.89,323.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-24_08-34-33
  done: false
  episode_len_mean: 322.65
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.2264999999999744
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 4
  episodes_total: 205
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.862502442465888
          entropy_coeff: 0.009999999999999998
          kl: 0.011891248014964183
          policy_loss: 0.044365895125601025
          total_loss: 0.03781205258435673
          vf_explained_var: 0.5631396174430847
          vf_loss: 0.010882055417944987
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,75,1606.77,75000,-3.2265,-2.49,-3.89,322.65


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-24_08-34-53
  done: false
  episode_len_mean: 322.64
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.226399999999975
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 208
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8615435653262669
          entropy_coeff: 0.009999999999999998
          kl: 0.01254416172580706
          policy_loss: 0.0928399013976256
          total_loss: 0.08414632769094574
          vf_explained_var: 0.5193389058113098
          vf_loss: 0.008667448300143911
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,76,1626.96,76000,-3.2264,-2.49,-3.89,322.64




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-24_08-35-32
  done: false
  episode_len_mean: 321.9
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.2189999999999754
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 211
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8231226828363207
          entropy_coeff: 0.009999999999999998
          kl: 0.013673045374752175
          policy_loss: 0.007151890463299221
          total_loss: -0.0013140320777893066
          vf_explained_var: 0.7122997045516968
          vf_loss: 0.008398000723941045
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,77,1665.78,77000,-3.219,-2.49,-3.89,321.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-24_08-35-55
  done: false
  episode_len_mean: 320.89
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.208899999999976
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 214
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8486288468043008
          entropy_coeff: 0.009999999999999998
          kl: 0.011301520309505339
          policy_loss: -0.1323500297135777
          total_loss: -0.1298369537625048
          vf_explained_var: 0.27436986565589905
          vf_loss: 0.019869211533417305
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,78,1688.58,78000,-3.2089,-2.49,-3.89,320.89


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-24_08-36-17
  done: false
  episode_len_mean: 319.3
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1929999999999756
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 4
  episodes_total: 218
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8469188716676501
          entropy_coeff: 0.009999999999999998
          kl: 0.013920881935998688
          policy_loss: 0.023328382376995353
          total_loss: 0.021239706542756823
          vf_explained_var: 0.5263829827308655
          vf_loss: 0.014988422507627143
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,79,1710.55,79000,-3.193,-2.49,-3.89,319.3


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-24_08-36-39
  done: false
  episode_len_mean: 317.98
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1797999999999758
  episode_reward_min: -3.889999999999961
  episodes_this_iter: 3
  episodes_total: 221
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.908127702607049
          entropy_coeff: 0.009999999999999998
          kl: 0.012487058531984967
          policy_loss: -0.04062764619787534
          total_loss: -0.04897925191455417
          vf_explained_var: 0.6751773357391357
          vf_loss: 0.009480965067632496
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,80,1733.02,80000,-3.1798,-2.49,-3.89,317.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-24_08-36-59
  done: false
  episode_len_mean: 316.86
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.168599999999976
  episode_reward_min: -3.759999999999964
  episodes_this_iter: 3
  episodes_total: 224
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.874155212773217
          entropy_coeff: 0.009999999999999998
          kl: 0.014300264560587313
          policy_loss: 0.07565100838740667
          total_loss: 0.07520383753710323
          vf_explained_var: 0.3609612286090851
          vf_loss: 0.01686435449971921
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,81,1752.94,81000,-3.1686,-2.49,-3.76,316.86


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-24_08-37-20
  done: false
  episode_len_mean: 316.03
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.160299999999977
  episode_reward_min: -3.699999999999965
  episodes_this_iter: 3
  episodes_total: 227
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.917638705836402
          entropy_coeff: 0.009999999999999998
          kl: 0.011440639305780959
          policy_loss: 0.044486584266026814
          total_loss: 0.03605242719252904
          vf_explained_var: 0.5517355799674988
          vf_loss: 0.009598164904552202
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,82,1773.45,82000,-3.1603,-2.49,-3.7,316.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-24_08-37-40
  done: false
  episode_len_mean: 315.56
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.155599999999976
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 230
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.9611210054821437
          entropy_coeff: 0.009999999999999998
          kl: 0.009974431320803234
          policy_loss: 0.04384655331571897
          total_loss: 0.03620958353082339
          vf_explained_var: 0.4519950747489929
          vf_loss: 0.01097679804596636
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,83,1793.65,83000,-3.1556,-2.49,-3.74,315.56


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-24_08-38-00
  done: false
  episode_len_mean: 315.66
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1565999999999756
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 233
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8386574109395346
          entropy_coeff: 0.009999999999999998
          kl: 0.01370065814261723
          policy_loss: 0.05642582161558999
          total_loss: 0.04948973672257529
          vf_explained_var: 0.533844530582428
          vf_loss: 0.01008042455650866
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,84,1813.47,84000,-3.1566,-2.49,-3.74,315.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-24_08-38-21
  done: false
  episode_len_mean: 314.79
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1478999999999773
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 236
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8184290634261238
          entropy_coeff: 0.009999999999999998
          kl: 0.00997481995697653
          policy_loss: 0.041675014048814775
          total_loss: 0.03897348551286591
          vf_explained_var: 0.25501781702041626
          vf_loss: 0.01448527914730625
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,85,1834.39,85000,-3.1479,-2.49,-3.74,314.79


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-24_08-38-40
  done: false
  episode_len_mean: 314.82
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1481999999999766
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 239
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8408338136143154
          entropy_coeff: 0.009999999999999998
          kl: 0.007693522874255147
          policy_loss: 0.01983871923552619
          total_loss: 0.013706247011820475
          vf_explained_var: 0.3878909945487976
          vf_loss: 0.011506510820860663
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,86,1853.98,86000,-3.1482,-2.49,-3.74,314.82




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-24_08-39-18
  done: false
  episode_len_mean: 313.81
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.138099999999976
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 242
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8364426255226136
          entropy_coeff: 0.009999999999999998
          kl: 0.011351833709150336
          policy_loss: 0.01928789516290029
          total_loss: 0.015416435731781853
          vf_explained_var: 0.17821598052978516
          vf_loss: 0.013357785010399918
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,87,1892.28,87000,-3.1381,-2.49,-3.74,313.81


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-24_08-39-40
  done: false
  episode_len_mean: 312.66
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1265999999999767
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 245
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7876235485076903
          entropy_coeff: 0.009999999999999998
          kl: 0.008719502842163607
          policy_loss: 0.051985925436019896
          total_loss: 0.046137358910507624
          vf_explained_var: 0.3317962884902954
          vf_loss: 0.01115572091528318
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,88,1913.93,88000,-3.1266,-2.49,-3.74,312.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-24_08-40-00
  done: false
  episode_len_mean: 312.01
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1200999999999777
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 248
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.8017919487423366
          entropy_coeff: 0.009999999999999998
          kl: 0.008296675016718937
          policy_loss: 0.08001318987872866
          total_loss: 0.07469533731540044
          vf_explained_var: 0.30294641852378845
          vf_loss: 0.011870399317962842
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,89,1934.18,89000,-3.1201,-2.49,-3.74,312.01


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-24_08-40-21
  done: false
  episode_len_mean: 312.42
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.124199999999978
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 251
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7654356956481934
          entropy_coeff: 0.009999999999999998
          kl: 0.007985027576535625
          policy_loss: 0.0678437708152665
          total_loss: 0.06337318122386933
          vf_explained_var: 0.25988438725471497
          vf_loss: 0.012385268609634497
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,90,1954.55,90000,-3.1242,-2.49,-3.74,312.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-24_08-40-41
  done: false
  episode_len_mean: 313.14
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.131399999999976
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 254
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.772431610690223
          entropy_coeff: 0.009999999999999998
          kl: 0.008771190760365815
          policy_loss: 0.04812655862834719
          total_loss: 0.044475344651275214
          vf_explained_var: 0.31879922747612
          vf_loss: 0.013195982688598128
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,91,1974.43,91000,-3.1314,-2.49,-3.74,313.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-24_08-41-02
  done: false
  episode_len_mean: 313.13
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1312999999999764
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 257
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7936824599901835
          entropy_coeff: 0.009999999999999998
          kl: 0.013032018755557059
          policy_loss: 0.03703169888920254
          total_loss: 0.032268248664008244
          vf_explained_var: 0.37421104311943054
          vf_loss: 0.011870173352912792
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,92,1996.1,92000,-3.1313,-2.49,-3.74,313.13


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-24_08-41-24
  done: false
  episode_len_mean: 313.56
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.135599999999977
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 260
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7974641429053413
          entropy_coeff: 0.009999999999999998
          kl: 0.012303739011352772
          policy_loss: 0.035120577861865364
          total_loss: 0.02901088289088673
          vf_explained_var: 0.4027714729309082
          vf_loss: 0.010634573142225337
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,93,2017.35,93000,-3.1356,-2.49,-3.74,313.56


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-24_08-41-45
  done: false
  episode_len_mean: 313.65
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1364999999999763
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 263
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7205819765726724
          entropy_coeff: 0.009999999999999998
          kl: 0.010975900694352544
          policy_loss: 0.06479984828167491
          total_loss: 0.05510880707038773
          vf_explained_var: 0.3376244008541107
          vf_loss: 0.006417188030253682
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,94,2038.85,94000,-3.1365,-2.49,-3.74,313.65


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-24_08-42-06
  done: false
  episode_len_mean: 313.79
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.137899999999977
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 266
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6749282505777148
          entropy_coeff: 0.009999999999999998
          kl: 0.00760371479244976
          policy_loss: -0.0358060612446732
          total_loss: -0.03806615066197183
          vf_explained_var: 0.1269427239894867
          vf_loss: 0.01372882093095945
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,95,2060.09,95000,-3.1379,-2.49,-3.74,313.79


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-24_08-42-28
  done: false
  episode_len_mean: 314.18
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1417999999999764
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 269
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.7101621945699057
          entropy_coeff: 0.009999999999999998
          kl: 0.006984580369274498
          policy_loss: -0.112366640733348
          total_loss: -0.11453491035434935
          vf_explained_var: 0.2773580849170685
          vf_loss: 0.014234894338167376
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,96,2081.23,96000,-3.1418,-2.49,-3.74,314.18




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-24_08-43-07
  done: false
  episode_len_mean: 313.55
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1354999999999764
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 273
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6462408728069728
          entropy_coeff: 0.009999999999999998
          kl: 0.009830532614694536
          policy_loss: 0.008317708472410838
          total_loss: 0.00789637714624405
          vf_explained_var: 0.21232418715953827
          vf_loss: 0.015058023958570428
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,97,2120.72,97000,-3.1355,-2.49,-3.74,313.55


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-24_08-43-29
  done: false
  episode_len_mean: 313.53
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1352999999999764
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 276
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5951176603635153
          entropy_coeff: 0.009999999999999998
          kl: 0.008278628382715465
          policy_loss: 0.04997204376591576
          total_loss: 0.046647879315747154
          vf_explained_var: 0.22184894979000092
          vf_loss: 0.011799148177184785
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,98,2142.97,98000,-3.1353,-2.49,-3.74,313.53


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-24_08-43-50
  done: false
  episode_len_mean: 314.18
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1417999999999755
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 279
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.6274190770255195
          entropy_coeff: 0.009999999999999998
          kl: 0.010129371840926953
          policy_loss: -0.04981508155663808
          total_loss: -0.05344880289501614
          vf_explained_var: 0.26917564868927
          vf_loss: 0.011627535482855618
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,99,2163.73,99000,-3.1418,-2.49,-3.74,314.18


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-24_08-44-12
  done: false
  episode_len_mean: 314.96
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.149599999999976
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 282
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.606866811381446
          entropy_coeff: 0.009999999999999998
          kl: 0.010270151999380575
          policy_loss: -0.10105655954943762
          total_loss: -0.10047691820396318
          vf_explained_var: 0.03916563093662262
          vf_loss: 0.01562129601629244
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,100,2185.48,100000,-3.1496,-2.72,-3.74,314.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-24_08-44-33
  done: false
  episode_len_mean: 315.77
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.1576999999999766
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 286
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.589032452636295
          entropy_coeff: 0.009999999999999998
          kl: 0.007813415986917881
          policy_loss: 0.023422385710808964
          total_loss: 0.023287218891912036
          vf_explained_var: 0.07909048348665237
          vf_loss: 0.014973815851327446
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,101,2206.17,101000,-3.1577,-2.72,-3.74,315.77


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-24_08-44-53
  done: false
  episode_len_mean: 316.89
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.168899999999976
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 289
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5389278279410468
          entropy_coeff: 0.009999999999999998
          kl: 0.013799118568206679
          policy_loss: 0.062084410753515035
          total_loss: 0.05863683389292823
          vf_explained_var: 0.008233300410211086
          vf_loss: 0.010561791146109398
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,102,2226.51,102000,-3.1689,-2.72,-3.74,316.89


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-24_08-45-15
  done: false
  episode_len_mean: 317.07
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.170699999999976
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 292
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5248416211869982
          entropy_coeff: 0.009999999999999998
          kl: 0.005710764211398909
          policy_loss: 0.05736920568678114
          total_loss: 0.05433366596698761
          vf_explained_var: 0.050971921533346176
          vf_loss: 0.01164180071791634
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,103,2248.83,103000,-3.1707,-2.72,-3.74,317.07


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-24_08-45-36
  done: false
  episode_len_mean: 317.72
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.1771999999999765
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 295
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.5029194010628595
          entropy_coeff: 0.009999999999999998
          kl: 0.010517524258031812
          policy_loss: -0.05314660651816262
          total_loss: -0.05219640897379981
          vf_explained_var: 0.0041991122998297215
          vf_loss: 0.01492763968805472
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,104,2269.96,104000,-3.1772,-2.72,-3.74,317.72


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-24_08-45-59
  done: false
  episode_len_mean: 317.85
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.1784999999999752
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 299
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09999999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 1.512626404232449
          entropy_coeff: 0.009999999999999998
          kl: 0.0032040618836819313
          policy_loss: 0.013867036832703485
          total_loss: 0.016131694118181866
          vf_explained_var: 0.06377854943275452
          vf_loss: 0.01707051406717963
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,105,2292.61,105000,-3.1785,-2.72,-3.74,317.85




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-24_08-46-41
  done: false
  episode_len_mean: 317.38
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.173799999999976
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 302
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5499297314220004
          entropy_coeff: 0.009999999999999998
          kl: 0.008071487220084035
          policy_loss: 0.002596844070487552
          total_loss: -0.0012052971455785964
          vf_explained_var: 0.2166038453578949
          vf_loss: 0.01129358231685021
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,106,2334.33,106000,-3.1738,-2.39,-3.74,317.38


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-24_08-47-04
  done: false
  episode_len_mean: 316.43
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.1642999999999764
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 306
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5979947527249654
          entropy_coeff: 0.009999999999999998
          kl: 0.009725948693696139
          policy_loss: 0.016769379874070487
          total_loss: 0.01494353852338261
          vf_explained_var: 0.3012915551662445
          vf_loss: 0.013667807386567195
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,107,2357.31,107000,-3.1643,-2.39,-3.74,316.43


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-24_08-47-26
  done: false
  episode_len_mean: 315.21
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.1520999999999764
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 309
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5726515094439188
          entropy_coeff: 0.009999999999999998
          kl: 0.009904759759418229
          policy_loss: 0.040094629757934146
          total_loss: 0.030706160763899485
          vf_explained_var: 0.5486465692520142
          vf_loss: 0.005842808480115814
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,108,2379.9,108000,-3.1521,-2.39,-3.74,315.21


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-24_08-47-49
  done: false
  episode_len_mean: 315.11
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.1510999999999765
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 4
  episodes_total: 313
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5890247013833787
          entropy_coeff: 0.009999999999999998
          kl: 0.008981062194893339
          policy_loss: 0.030209820303652022
          total_loss: 0.030097718867990706
          vf_explained_var: 0.4187091290950775
          vf_loss: 0.01532909314458569
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,109,2402.27,109000,-3.1511,-2.39,-3.74,315.11


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-24_08-48-11
  done: false
  episode_len_mean: 315.09
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.1508999999999765
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 316
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.679560375213623
          entropy_coeff: 0.009999999999999998
          kl: 0.00954493927131534
          policy_loss: 0.035571161243650645
          total_loss: 0.02958140356673135
          vf_explained_var: 0.5522876977920532
          vf_loss: 0.010328598268097266
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,110,2424.15,110000,-3.1509,-2.39,-3.74,315.09


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-24_08-48-32
  done: false
  episode_len_mean: 315.64
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.1563999999999766
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 319
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.696572204430898
          entropy_coeff: 0.009999999999999998
          kl: 0.015785457157608027
          policy_loss: 0.005114524728722042
          total_loss: 0.0017353615827030607
          vf_explained_var: 0.4749094247817993
          vf_loss: 0.012797282918149399
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,111,2445.24,111000,-3.1564,-2.39,-3.74,315.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-24_08-48-53
  done: false
  episode_len_mean: 316.52
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.165199999999976
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 322
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6839678353733487
          entropy_coeff: 0.009999999999999998
          kl: 0.013764360560417464
          policy_loss: -0.014123290156324705
          total_loss: -0.016311274303330316
          vf_explained_var: 0.3240639269351959
          vf_loss: 0.01396347563713789
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,112,2466.4,112000,-3.1652,-2.39,-3.74,316.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-24_08-49-14
  done: false
  episode_len_mean: 315.65
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.156499999999976
  episode_reward_min: -3.7399999999999642
  episodes_this_iter: 3
  episodes_total: 325
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6224944922659132
          entropy_coeff: 0.009999999999999998
          kl: 0.012492815251706466
          policy_loss: -0.09686867656807105
          total_loss: -0.09479828737676144
          vf_explained_var: 0.21790704131126404
          vf_loss: 0.0176706966633598
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,113,2486.99,113000,-3.1565,-2.39,-3.74,315.65


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-24_08-49-36
  done: false
  episode_len_mean: 314.19
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.1418999999999766
  episode_reward_min: -3.7199999999999647
  episodes_this_iter: 4
  episodes_total: 329
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.6032930705282422
          entropy_coeff: 0.009999999999999998
          kl: 0.008145970624320419
          policy_loss: -0.015637757173842855
          total_loss: -0.0142878041913112
          vf_explained_var: 0.04749152809381485
          vf_loss: 0.016975581459701062
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,114,2508.94,114000,-3.1419,-2.39,-3.72,314.19




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-24_08-50-14
  done: false
  episode_len_mean: 313.21
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.1320999999999772
  episode_reward_min: -3.7199999999999647
  episodes_this_iter: 3
  episodes_total: 332
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.600946275393168
          entropy_coeff: 0.009999999999999998
          kl: 0.011185916897121393
          policy_loss: 0.04411390937036938
          total_loss: 0.04018847288356887
          vf_explained_var: 0.1886018067598343
          vf_loss: 0.011524732452946612
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,115,2547.76,115000,-3.1321,-2.39,-3.72,313.21


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-24_08-50-36
  done: false
  episode_len_mean: 311.93
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.119299999999977
  episode_reward_min: -3.709999999999965
  episodes_this_iter: 3
  episodes_total: 335
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5212813072734408
          entropy_coeff: 0.009999999999999998
          kl: 0.010834865573946039
          policy_loss: 0.01840950118170844
          total_loss: 0.01515498459339142
          vf_explained_var: 0.05161081254482269
          vf_loss: 0.011416552677918744
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,116,2569.2,116000,-3.1193,-2.39,-3.71,311.93


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-24_08-50-57
  done: false
  episode_len_mean: 311.44
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.114399999999977
  episode_reward_min: -3.6899999999999653
  episodes_this_iter: 3
  episodes_total: 338
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.544397058751848
          entropy_coeff: 0.009999999999999998
          kl: 0.006581315336038429
          policy_loss: 0.007891623179117839
          total_loss: 0.0050626706745889455
          vf_explained_var: 0.01625249907374382
          vf_loss: 0.012285952527438186
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,117,2590.08,117000,-3.1144,-2.39,-3.69,311.44


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-24_08-51-17
  done: false
  episode_len_mean: 311.7
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.1169999999999773
  episode_reward_min: -3.6899999999999653
  episodes_this_iter: 3
  episodes_total: 341
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5839109235339695
          entropy_coeff: 0.009999999999999998
          kl: 0.008720769846776477
          policy_loss: -0.09397589597437117
          total_loss: -0.09354680197106467
          vf_explained_var: 0.06265605241060257
          vf_loss: 0.015832163848810726
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,118,2610.35,118000,-3.117,-2.39,-3.69,311.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-24_08-51-37
  done: false
  episode_len_mean: 311.79
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.1178999999999775
  episode_reward_min: -3.6899999999999653
  episodes_this_iter: 3
  episodes_total: 344
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.5088275816705492
          entropy_coeff: 0.009999999999999998
          kl: 0.017226692240642195
          policy_loss: -0.11249543925126394
          total_loss: -0.1123393072022332
          vf_explained_var: 0.17377817630767822
          vf_loss: 0.014383074672271809
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,119,2630.33,119000,-3.1179,-2.39,-3.69,311.79


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-24_08-51-59
  done: false
  episode_len_mean: 311.5
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.114999999999978
  episode_reward_min: -3.6899999999999653
  episodes_this_iter: 3
  episodes_total: 347
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.468955041302575
          entropy_coeff: 0.009999999999999998
          kl: 0.013009816499564881
          policy_loss: -0.13142916940980487
          total_loss: -0.13084577959444788
          vf_explained_var: 0.19104112684726715
          vf_loss: 0.014622450371583303
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,120,2651.8,120000,-3.115,-2.39,-3.69,311.5


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-24_08-52-20
  done: false
  episode_len_mean: 310.13
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.101299999999978
  episode_reward_min: -3.6899999999999653
  episodes_this_iter: 4
  episodes_total: 351
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3730272120899625
          entropy_coeff: 0.009999999999999998
          kl: 0.011765386971199129
          policy_loss: 0.03987538640697797
          total_loss: 0.038018659502267835
          vf_explained_var: 0.15319298207759857
          vf_loss: 0.011285273084003065
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,121,2673.48,121000,-3.1013,-2.39,-3.69,310.13


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-24_08-52-43
  done: false
  episode_len_mean: 308.58
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.085799999999978
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 354
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.3024946636623806
          entropy_coeff: 0.009999999999999998
          kl: 0.007516454506898442
          policy_loss: 0.045140905098782644
          total_loss: 0.040480034136109885
          vf_explained_var: 0.2759151756763458
          vf_loss: 0.007988252574836627
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,122,2696.06,122000,-3.0858,-2.39,-3.5,308.58


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-24_08-53-07
  done: false
  episode_len_mean: 306.63
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.0662999999999783
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 358
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1853251973787944
          entropy_coeff: 0.009999999999999998
          kl: 0.009324330988134713
          policy_loss: 0.010402016590038936
          total_loss: 0.014479472984870274
          vf_explained_var: 0.12596666812896729
          vf_loss: 0.015464491604102983
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,123,2719.71,123000,-3.0663,-2.39,-3.5,306.63




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-24_08-53-49
  done: false
  episode_len_mean: 304.46
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.0445999999999787
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 362
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2571058048142327
          entropy_coeff: 0.009999999999999998
          kl: 0.005870147967576905
          policy_loss: 0.01294346335861418
          total_loss: 0.013845172441667981
          vf_explained_var: 0.33719679713249207
          vf_loss: 0.013179261620259947
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,124,2762.31,124000,-3.0446,-2.39,-3.5,304.46


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-24_08-54-14
  done: false
  episode_len_mean: 303.2
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.0319999999999796
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 365
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1044425262345208
          entropy_coeff: 0.009999999999999998
          kl: 0.009241080531303207
          policy_loss: -0.056386112421751025
          total_loss: -0.055919013255172303
          vf_explained_var: 0.30345505475997925
          vf_loss: 0.011049470584839582
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,125,2786.95,125000,-3.032,-2.39,-3.5,303.2


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-24_08-54-38
  done: false
  episode_len_mean: 301.28
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -3.01279999999998
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 369
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.123243839210934
          entropy_coeff: 0.009999999999999998
          kl: 0.007881958498559606
          policy_loss: 0.009612794468800227
          total_loss: 0.009844052212105856
          vf_explained_var: 0.20023944973945618
          vf_loss: 0.011069597327150404
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,126,2811,126000,-3.0128,-2.39,-3.5,301.28


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-24_08-55-03
  done: false
  episode_len_mean: 299.6
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.9959999999999805
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 373
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.0491707583268484
          entropy_coeff: 0.009999999999999998
          kl: 0.005694415545187641
          policy_loss: 0.02298137843608856
          total_loss: 0.026886275907357534
          vf_explained_var: 0.2495683878660202
          vf_loss: 0.014111885024855534
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,127,2836.28,127000,-2.996,-2.39,-3.5,299.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-24_08-55-30
  done: false
  episode_len_mean: 297.85
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.978499999999981
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 377
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1118462867206997
          entropy_coeff: 0.009999999999999998
          kl: 0.007768919009734917
          policy_loss: 0.021045209136274126
          total_loss: 0.026151558922396765
          vf_explained_var: 0.10495080053806305
          vf_loss: 0.01583636553130216
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,128,2863.18,128000,-2.9785,-2.39,-3.5,297.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-24_08-55-56
  done: false
  episode_len_mean: 295.59
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.95589999999998
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 381
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2326341536309984
          entropy_coeff: 0.009999999999999998
          kl: 0.008230695940908727
          policy_loss: -0.010741967583696048
          total_loss: -0.007777122408151626
          vf_explained_var: 0.2009439915418625
          vf_loss: 0.014879653064741028
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,129,2889.29,129000,-2.9559,-2.39,-3.5,295.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-24_08-56-21
  done: false
  episode_len_mean: 294.79
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.9478999999999815
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 384
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.2679143243365818
          entropy_coeff: 0.009999999999999998
          kl: 0.007032834181557254
          policy_loss: -0.0036859283016787633
          total_loss: -0.007553822878334258
          vf_explained_var: 0.44428351521492004
          vf_loss: 0.008459605566329426
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 1300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,130,2913.76,130000,-2.9479,-2.39,-3.5,294.79


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-24_08-56-45
  done: false
  episode_len_mean: 292.69
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.926899999999981
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 388
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1959870828522576
          entropy_coeff: 0.009999999999999998
          kl: 0.006773082613422766
          policy_loss: 0.020623510165346994
          total_loss: 0.020021780497497983
          vf_explained_var: 0.4884040057659149
          vf_loss: 0.011019484791904687
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,131,2938.4,131000,-2.9269,-2.39,-3.5,292.69




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-24_08-57-28
  done: false
  episode_len_mean: 290.98
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.9097999999999815
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 392
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.17236524687873
          entropy_coeff: 0.009999999999999998
          kl: 0.014280831464861551
          policy_loss: 0.02078663408756256
          total_loss: 0.019469421522484886
          vf_explained_var: 0.6619657874107361
          vf_loss: 0.009692398277628753
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,132,2980.56,132000,-2.9098,-2.39,-3.5,290.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-24_08-57-53
  done: false
  episode_len_mean: 289.63
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.896299999999982
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 395
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.106015051735772
          entropy_coeff: 0.009999999999999998
          kl: 0.008772367645833196
          policy_loss: -0.09768587127327918
          total_loss: -0.099592900607321
          vf_explained_var: 0.6507740020751953
          vf_loss: 0.008714503349943293
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,133,3005.88,133000,-2.8963,-2.39,-3.5,289.63


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-24_08-58-17
  done: false
  episode_len_mean: 288.55
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.8854999999999817
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 399
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1084800958633423
          entropy_coeff: 0.009999999999999998
          kl: 0.008688004926471276
          policy_loss: -0.01708822896083196
          total_loss: -0.020925074484613206
          vf_explained_var: 0.761309027671814
          vf_loss: 0.0068135530222207304
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,134,3030.26,134000,-2.8855,-2.39,-3.5,288.55


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-24_08-58-43
  done: false
  episode_len_mean: 287.84
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.8783999999999828
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 403
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.1261211680041419
          entropy_coeff: 0.009999999999999998
          kl: 0.010880533286088104
          policy_loss: 0.031288726462258235
          total_loss: 0.027126348349783155
          vf_explained_var: 0.8040520548820496
          vf_loss: 0.0065548059013154775
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,135,3056.18,135000,-2.8784,-2.45,-3.5,287.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-24_08-59-10
  done: false
  episode_len_mean: 286.84
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.8683999999999834
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 407
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.9526654150750902
          entropy_coeff: 0.009999999999999998
          kl: 0.005081121499039851
          policy_loss: 0.014915872696373197
          total_loss: 0.014313240266508527
          vf_explained_var: 0.718739926815033
          vf_loss: 0.008669964094749755
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,136,3083.09,136000,-2.8684,-2.45,-3.5,286.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-24_08-59-36
  done: false
  episode_len_mean: 285.44
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.8543999999999836
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 411
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.9786177575588226
          entropy_coeff: 0.009999999999999998
          kl: 0.005069556054616698
          policy_loss: 0.0008985062336756124
          total_loss: -0.0009457790189319186
          vf_explained_var: 0.7529829144477844
          vf_loss: 0.007688414145054089
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 1370

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,137,3108.62,137000,-2.8544,-2.45,-3.5,285.44


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-24_09-00-02
  done: false
  episode_len_mean: 283.96
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.8395999999999835
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 415
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.0288697216245863
          entropy_coeff: 0.009999999999999998
          kl: 0.009927026415945958
          policy_loss: -0.029295032719771068
          total_loss: -0.03150775987241003
          vf_explained_var: 0.7295660972595215
          vf_loss: 0.0075796175437668955
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,138,3135.08,138000,-2.8396,-2.43,-3.5,283.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-24_09-00-29
  done: false
  episode_len_mean: 282.5
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -2.8249999999999833
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 418
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.9602206753359901
          entropy_coeff: 0.009999999999999998
          kl: 0.007784465293064989
          policy_loss: -0.11132673728797171
          total_loss: -0.10843348585897022
          vf_explained_var: 0.46720030903816223
          vf_loss: 0.012106234973503483
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,139,3161.75,139000,-2.825,-2.43,-3.5,282.5




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-24_09-01-13
  done: false
  episode_len_mean: 279.87
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.7986999999999838
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 422
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.900942196448644
          entropy_coeff: 0.009999999999999998
          kl: 0.015260974401793071
          policy_loss: -0.11194546181294653
          total_loss: -0.1055663089785311
          vf_explained_var: 0.24134230613708496
          vf_loss: 0.014625527513109976
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 140000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,140,3206.27,140000,-2.7987,-2.38,-3.5,279.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-24_09-01-40
  done: false
  episode_len_mean: 277.42
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.7741999999999845
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 426
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.7526913073327807
          entropy_coeff: 0.009999999999999998
          kl: 0.009188398613931762
          policy_loss: 0.00014875630537668864
          total_loss: 0.004975490934318967
          vf_explained_var: 0.13116449117660522
          vf_loss: 0.011894228485309416
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 1410

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,141,3232.64,141000,-2.7742,-2.38,-3.5,277.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-24_09-02-06
  done: false
  episode_len_mean: 275.9
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.758999999999985
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 430
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04999999999999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.8290146423710717
          entropy_coeff: 0.009999999999999998
          kl: 0.025204010000892098
          policy_loss: 0.007591059721178479
          total_loss: 0.011527974986367755
          vf_explained_var: 0.23396944999694824
          vf_loss: 0.010966860440870125
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,142,3258.27,142000,-2.759,-2.38,-3.5,275.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-24_09-02-32
  done: false
  episode_len_mean: 273.33
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.7332999999999856
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 434
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.075
          cur_lr: 5.000000000000001e-05
          entropy: 0.7511215455002255
          entropy_coeff: 0.009999999999999998
          kl: 0.004559217487291751
          policy_loss: 0.018376136736737356
          total_loss: 0.023949508782890108
          vf_explained_var: 0.10433515161275864
          vf_loss: 0.012742646514541572
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,143,3284.55,143000,-2.7333,-2.38,-3.41,273.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-24_09-02-57
  done: false
  episode_len_mean: 271.01
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.7100999999999855
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 438
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 0.8860604650444455
          entropy_coeff: 0.009999999999999998
          kl: 0.018466493924831046
          policy_loss: 0.030947652873065737
          total_loss: 0.03583263680338859
          vf_explained_var: 0.04825029894709587
          vf_loss: 0.013053096416923735
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,144,3309.9,144000,-2.7101,-2.38,-3.41,271.01


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-24_09-03-21
  done: false
  episode_len_mean: 270.03
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.7002999999999866
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 3
  episodes_total: 441
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.011917761961619
          entropy_coeff: 0.009999999999999998
          kl: 0.006556323067319722
          policy_loss: -0.08800672325823042
          total_loss: -0.08446644345919291
          vf_explained_var: 0.05774659663438797
          vf_loss: 0.01341359620499942
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,145,3333.56,145000,-2.7003,-2.38,-3.41,270.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-24_09-03-43
  done: false
  episode_len_mean: 268.84
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.688399999999987
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 4
  episodes_total: 445
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1325374046961467
          entropy_coeff: 0.009999999999999998
          kl: 0.009138094658653036
          policy_loss: 0.01607730620437198
          total_loss: 0.019230373203754425
          vf_explained_var: 0.014814731664955616
          vf_loss: 0.014135762624856498
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,146,3355.75,146000,-2.6884,-2.38,-3.31,268.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-24_09-04-04
  done: false
  episode_len_mean: 269.47
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.6946999999999868
  episode_reward_min: -3.3099999999999734
  episodes_this_iter: 3
  episodes_total: 448
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2823309752676222
          entropy_coeff: 0.009999999999999998
          kl: 0.011731490006358552
          policy_loss: 0.05584626777304543
          total_loss: 0.05366210291783015
          vf_explained_var: -0.09554627537727356
          vf_loss: 0.010199212207387771
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,147,3376.54,147000,-2.6947,-2.38,-3.31,269.47




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-24_09-04-38
  done: false
  episode_len_mean: 270.24
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.7023999999999866
  episode_reward_min: -3.6799999999999655
  episodes_this_iter: 2
  episodes_total: 450
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2221248745918274
          entropy_coeff: 0.009999999999999998
          kl: 0.011946985915296754
          policy_loss: -0.08011542740795348
          total_loss: -0.08120681328905953
          vf_explained_var: -0.11666162312030792
          vf_loss: 0.010681849233353406
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,148,3410.89,148000,-2.7024,-2.38,-3.68,270.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-24_09-04-55
  done: false
  episode_len_mean: 273.64
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.7363999999999855
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 2
  episodes_total: 452
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2600433667500814
          entropy_coeff: 0.009999999999999998
          kl: 0.01634961154366847
          policy_loss: -0.089965168469482
          total_loss: -0.09032602839999729
          vf_explained_var: -0.11453621834516525
          vf_loss: 0.011626463195231434
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,149,3427.85,149000,-2.7364,-2.38,-4.73,273.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-24_09-05-11
  done: false
  episode_len_mean: 278.77
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.7876999999999845
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 455
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2808010511928134
          entropy_coeff: 0.009999999999999998
          kl: 0.011227994159608635
          policy_loss: 0.04595045215553707
          total_loss: 0.045727734764417015
          vf_explained_var: -0.3287859261035919
          vf_loss: 0.012164244614218155
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 150000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,150,3443.27,150000,-2.7877,-2.38,-4.73,278.77


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-24_09-05-28
  done: false
  episode_len_mean: 281.03
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.810299999999984
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 2
  episodes_total: 457
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.241976147227817
          entropy_coeff: 0.009999999999999998
          kl: 0.0056784622792363845
          policy_loss: -0.08178992966810862
          total_loss: -0.08212032914161682
          vf_explained_var: -0.11438853293657303
          vf_loss: 0.011876414831688938
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 151000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,151,3460.93,151000,-2.8103,-2.38,-4.73,281.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-24_09-05-45
  done: false
  episode_len_mean: 285.29
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.8528999999999836
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 460
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2491137305895488
          entropy_coeff: 0.009999999999999998
          kl: 0.00986277427222871
          policy_loss: 0.04279180781708823
          total_loss: 0.043525944898525876
          vf_explained_var: -0.23795059323310852
          vf_loss: 0.012855418850409073
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,152,3477.81,152000,-2.8529,-2.38,-4.73,285.29


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-24_09-06-02
  done: false
  episode_len_mean: 288.59
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.885899999999982
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 2
  episodes_total: 462
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.197713757885827
          entropy_coeff: 0.009999999999999998
          kl: 0.010020651300555036
          policy_loss: 0.07192905180984073
          total_loss: 0.06729415473010805
          vf_explained_var: -0.2729989290237427
          vf_loss: 0.0069664658722306035
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,153,3494.3,153000,-2.8859,-2.38,-4.73,288.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-24_09-06-19
  done: false
  episode_len_mean: 292.18
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.921799999999982
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 465
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.181081438064575
          entropy_coeff: 0.009999999999999998
          kl: 0.008067247039282598
          policy_loss: 0.10493196133110258
          total_loss: 0.09974751671155294
          vf_explained_var: -0.32083261013031006
          vf_loss: 0.006323847813392704
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,154,3511.92,154000,-2.9218,-2.38,-4.73,292.18


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-24_09-06-38
  done: false
  episode_len_mean: 293.88
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.938799999999981
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 2
  episodes_total: 467
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1281354639265273
          entropy_coeff: 0.009999999999999998
          kl: 0.012504591835510049
          policy_loss: -0.09210888031456206
          total_loss: -0.08969123860200247
          vf_explained_var: -0.07564524561166763
          vf_loss: 0.013230076648889937
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,155,3530.73,155000,-2.9388,-2.38,-4.73,293.88


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-24_09-06-58
  done: false
  episode_len_mean: 296.08
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.9607999999999812
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 470
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0375
          cur_lr: 5.000000000000001e-05
          entropy: 1.0775548974672953
          entropy_coeff: 0.009999999999999998
          kl: 0.004991068509878034
          policy_loss: -0.10388831131988102
          total_loss: -0.09597747863994704
          vf_explained_var: 0.012473915703594685
          vf_loss: 0.01849921587854624
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 156000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,156,3550.61,156000,-2.9608,-2.38,-4.73,296.08


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-24_09-07-20
  done: false
  episode_len_mean: 297.84
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -2.9783999999999797
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 473
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01875
          cur_lr: 5.000000000000001e-05
          entropy: 1.0146641665034823
          entropy_coeff: 0.009999999999999998
          kl: 0.007462867987630596
          policy_loss: -0.10316513925790786
          total_loss: -0.09656894720262951
          vf_explained_var: 0.10033362358808517
          vf_loss: 0.016602904980795252
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,157,3572.05,157000,-2.9784,-2.38,-4.73,297.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-24_09-07-40
  done: false
  episode_len_mean: 300.04
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.0003999999999804
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 476
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01875
          cur_lr: 5.000000000000001e-05
          entropy: 1.025025184949239
          entropy_coeff: 0.009999999999999998
          kl: 0.007344110013530683
          policy_loss: -0.06842400183280309
          total_loss: -0.06517508890893724
          vf_explained_var: -0.06521070003509521
          vf_loss: 0.013361458423443967
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,158,3592.32,158000,-3.0004,-2.38,-4.73,300.04


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-24_09-08-00
  done: false
  episode_len_mean: 302.62
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.02619999999998
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 479
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01875
          cur_lr: 5.000000000000001e-05
          entropy: 1.060323581430647
          entropy_coeff: 0.009999999999999998
          kl: 0.006311707507109471
          policy_loss: -0.10020561367273331
          total_loss: -0.09388911359839969
          vf_explained_var: 0.04983319342136383
          vf_loss: 0.01680138897564676
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 159000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,159,3612.79,159000,-3.0262,-2.38,-4.73,302.62




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-24_09-08-36
  done: false
  episode_len_mean: 305.99
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.059899999999979
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 482
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01875
          cur_lr: 5.000000000000001e-05
          entropy: 1.1615265594588386
          entropy_coeff: 0.009999999999999998
          kl: 0.005622322799203328
          policy_loss: 0.07957392276989089
          total_loss: 0.07720018633537823
          vf_explained_var: -0.28716039657592773
          vf_loss: 0.00913611187457314
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 160000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,160,3648.81,160000,-3.0599,-2.38,-4.73,305.99


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-24_09-08-53
  done: false
  episode_len_mean: 309.08
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.090799999999979
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 2
  episodes_total: 484
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01875
          cur_lr: 5.000000000000001e-05
          entropy: 1.1781876378589207
          entropy_coeff: 0.009999999999999998
          kl: 0.003393209346453416
          policy_loss: -0.0741748109459877
          total_loss: -0.07329014903969235
          vf_explained_var: -0.09304490685462952
          vf_loss: 0.012602916312688547
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,161,3665.71,161000,-3.0908,-2.38,-4.73,309.08


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-24_09-09-12
  done: false
  episode_len_mean: 312.79
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.1278999999999773
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 487
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2120215243763395
          entropy_coeff: 0.009999999999999998
          kl: 0.012682394740858131
          policy_loss: 0.03932982443107499
          total_loss: 0.0397860007153617
          vf_explained_var: -0.180496484041214
          vf_loss: 0.012457495905497733
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 162000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,162,3684.49,162000,-3.1279,-2.38,-4.73,312.79


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-24_09-09-32
  done: false
  episode_len_mean: 315.27
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.1526999999999763
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 490
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1910756985346476
          entropy_coeff: 0.009999999999999998
          kl: 0.012994168278121521
          policy_loss: 0.05151348014672597
          total_loss: 0.05133034620020124
          vf_explained_var: -0.1762692928314209
          vf_loss: 0.011605805008568698
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 163000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,163,3704.49,163000,-3.1527,-2.38,-4.73,315.27


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-24_09-09-52
  done: false
  episode_len_mean: 317.46
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.174599999999976
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 493
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.239028059111701
          entropy_coeff: 0.009999999999999998
          kl: 0.009107887088689322
          policy_loss: 0.05464886873960495
          total_loss: 0.05444408257802327
          vf_explained_var: -0.019777506589889526
          vf_loss: 0.012100107341797815
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,164,3724.11,164000,-3.1746,-2.38,-4.73,317.46


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-24_09-10-09
  done: false
  episode_len_mean: 320.04
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.2003999999999757
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 2
  episodes_total: 495
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.2071121626430088
          entropy_coeff: 0.009999999999999998
          kl: 0.014521143672725619
          policy_loss: -0.0897514369752672
          total_loss: -0.08956130676799351
          vf_explained_var: -0.12619450688362122
          vf_loss: 0.012125117582682934
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,165,3741.5,165000,-3.2004,-2.38,-4.73,320.04


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-24_09-10-25
  done: false
  episode_len_mean: 323.63
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.236299999999975
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 2
  episodes_total: 497
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1084121253755357
          entropy_coeff: 0.009999999999999998
          kl: 0.017323485988917517
          policy_loss: -0.07681016756428613
          total_loss: -0.07507695787482792
          vf_explained_var: -0.30217310786247253
          vf_loss: 0.012654921286027982
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,166,3757.58,166000,-3.2363,-2.38,-4.73,323.63


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-24_09-10-41
  done: false
  episode_len_mean: 328.51
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.2850999999999737
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 500
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1247998634974161
          entropy_coeff: 0.009999999999999998
          kl: 0.014004837987170523
          policy_loss: 0.045767036908202705
          total_loss: 0.04724530014726851
          vf_explained_var: -0.2255365550518036
          vf_loss: 0.012594967773960282
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,167,3773.6,167000,-3.2851,-2.38,-4.73,328.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-24_09-10-59
  done: false
  episode_len_mean: 331.32
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3131999999999726
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 2
  episodes_total: 502
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1943215595351324
          entropy_coeff: 0.009999999999999998
          kl: 0.015957402252407445
          policy_loss: -0.09844604664378696
          total_loss: -0.09764003091388279
          vf_explained_var: 0.10566046088933945
          vf_loss: 0.012599627149433622
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 168000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,168,3791.05,168000,-3.3132,-2.38,-4.73,331.32


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-24_09-11-19
  done: false
  episode_len_mean: 334.23
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3422999999999727
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 505
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1450693017906612
          entropy_coeff: 0.009999999999999998
          kl: 0.014647245908256945
          policy_loss: -0.002040776610374451
          total_loss: -0.0006210390892293718
          vf_explained_var: 0.029400914907455444
          vf_loss: 0.012733115642590241
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,169,3810.99,169000,-3.3423,-2.38,-4.73,334.23


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-24_09-11-40
  done: false
  episode_len_mean: 336.17
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.361699999999973
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 508
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.1068587236934238
          entropy_coeff: 0.009999999999999998
          kl: 0.008783706086767514
          policy_loss: -0.09769050396151013
          total_loss: -0.09160402284728156
          vf_explained_var: 0.058070771396160126
          vf_loss: 0.01707271672785282
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,170,3832.54,170000,-3.3617,-2.38,-4.73,336.17




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-24_09-12-18
  done: false
  episode_len_mean: 337.88
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.378799999999972
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 511
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.078105754322476
          entropy_coeff: 0.009999999999999998
          kl: 0.009597148016299822
          policy_loss: -0.11026834497849146
          total_loss: -0.10507154969705476
          vf_explained_var: 0.0965883657336235
          vf_loss: 0.01588788278814819
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,171,3870.64,171000,-3.3788,-2.38,-4.73,337.88


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-24_09-12-40
  done: false
  episode_len_mean: 339.9
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.3989999999999725
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 514
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.129743398560418
          entropy_coeff: 0.009999999999999998
          kl: 0.008523363354757072
          policy_loss: -0.12987081706523895
          total_loss: -0.1255031171772215
          vf_explained_var: 0.16782252490520477
          vf_loss: 0.015585227414137787
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,172,3891.65,172000,-3.399,-2.38,-4.73,339.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-24_09-13-00
  done: false
  episode_len_mean: 342.54
  episode_media: {}
  episode_reward_max: -2.3799999999999932
  episode_reward_mean: -3.4253999999999714
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 4
  episodes_total: 518
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 1.0535721196068657
          entropy_coeff: 0.009999999999999998
          kl: 0.006894068865573407
          policy_loss: -0.0021386474370956423
          total_loss: 0.0018663708534505633
          vf_explained_var: 0.11321963369846344
          vf_loss: 0.014476104732602834
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 173000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,173,3912.18,173000,-3.4254,-2.38,-4.73,342.54


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-24_09-13-22
  done: false
  episode_len_mean: 344.16
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.44159999999997
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 521
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 0.9341247803635068
          entropy_coeff: 0.009999999999999998
          kl: 0.009274818244752936
          policy_loss: 0.06277186671892802
          total_loss: 0.06395288474029964
          vf_explained_var: 0.2069471925497055
          vf_loss: 0.010435315303685558
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 174000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,174,3933.67,174000,-3.4416,-2.4,-4.73,344.16


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-24_09-13-45
  done: false
  episode_len_mean: 344.87
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.4486999999999703
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 524
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009375
          cur_lr: 5.000000000000001e-05
          entropy: 0.8011409064133962
          entropy_coeff: 0.009999999999999998
          kl: 0.020109023735754662
          policy_loss: -0.028720146748754713
          total_loss: -0.025941636496120028
          vf_explained_var: 0.012422783300280571
          vf_loss: 0.010601393557671044
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,175,3956.86,175000,-3.4487,-2.4,-4.73,344.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-24_09-14-08
  done: false
  episode_len_mean: 345.88
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.4587999999999703
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 4
  episodes_total: 528
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 0.933175202873018
          entropy_coeff: 0.009999999999999998
          kl: 0.015485539513695952
          policy_loss: 0.024231924447748396
          total_loss: 0.030496347033315236
          vf_explained_var: 0.05285933241248131
          vf_loss: 0.015378407647626267
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,176,3980.52,176000,-3.4588,-2.4,-4.73,345.88


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-24_09-14-32
  done: false
  episode_len_mean: 346.59
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.4658999999999702
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 4
  episodes_total: 532
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 0.9814588824907938
          entropy_coeff: 0.009999999999999998
          kl: 0.014335532831298537
          policy_loss: -0.009994698440035183
          total_loss: -0.004840350730551613
          vf_explained_var: 0.17044450342655182
          vf_loss: 0.014767341688275338
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 17700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,177,4004.4,177000,-3.4659,-2.4,-4.73,346.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-24_09-14-56
  done: false
  episode_len_mean: 347.18
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.4717999999999694
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 535
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 0.9301949653360578
          entropy_coeff: 0.009999999999999998
          kl: 0.009769652400571142
          policy_loss: 0.008513582994540532
          total_loss: 0.009805248015456729
          vf_explained_var: 0.11278408765792847
          vf_loss: 0.010456228101005157
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 178000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,178,4028.29,178000,-3.4718,-2.52,-4.73,347.18


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-24_09-15-21
  done: false
  episode_len_mean: 347.01
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.4700999999999698
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 4
  episodes_total: 539
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.021517910559972
          entropy_coeff: 0.009999999999999998
          kl: 0.005324517866786651
          policy_loss: 0.024644906487729816
          total_loss: 0.027964500586191814
          vf_explained_var: 0.19863519072532654
          vf_loss: 0.013459894019696448
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,179,4052.94,179000,-3.4701,-2.57,-4.73,347.01




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-24_09-16-03
  done: false
  episode_len_mean: 346.6
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.465999999999969
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 3
  episodes_total: 542
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.0585189329253302
          entropy_coeff: 0.009999999999999998
          kl: 0.008223995073694389
          policy_loss: -0.10426954668429163
          total_loss: -0.09996529759632217
          vf_explained_var: 0.16978207230567932
          vf_loss: 0.014773788075480196
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,180,4094.86,180000,-3.466,-2.24,-4.73,346.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-24_09-16-27
  done: false
  episode_len_mean: 345.19
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.4518999999999704
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 4
  episodes_total: 546
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 0.9859861592451732
          entropy_coeff: 0.009999999999999998
          kl: 0.012650307213830184
          policy_loss: -0.011929910878340404
          total_loss: -0.01022275338570277
          vf_explained_var: 0.1417582482099533
          vf_loss: 0.011389124745296107
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 181000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,181,4118.91,181000,-3.4519,-2.24,-4.73,345.19


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-24_09-16-52
  done: false
  episode_len_mean: 342.48
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.424799999999971
  episode_reward_min: -4.729999999999944
  episodes_this_iter: 4
  episodes_total: 550
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 0.8422287431028154
          entropy_coeff: 0.009999999999999998
          kl: 0.009954505694026889
          policy_loss: 0.018329594284296036
          total_loss: 0.02554566827085283
          vf_explained_var: 0.08637325465679169
          vf_loss: 0.015498372788230578
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,182,4143.75,182000,-3.4248,-2.24,-4.73,342.48


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-24_09-17-18
  done: false
  episode_len_mean: 333.83
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.338299999999972
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 554
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 0.9223314642906189
          entropy_coeff: 0.009999999999999998
          kl: 0.008146634233600973
          policy_loss: 0.01355574598742856
          total_loss: 0.018333371521698102
          vf_explained_var: 0.1962776780128479
          vf_loss: 0.01388637341765894
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,183,4169.76,183000,-3.3383,-2.24,-4.48,333.83


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-24_09-17-43
  done: false
  episode_len_mean: 327.64
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.2763999999999744
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 558
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 0.8829015281465319
          entropy_coeff: 0.009999999999999998
          kl: 0.005618343382453933
          policy_loss: 0.03461388159129355
          total_loss: 0.037912382723556626
          vf_explained_var: 0.22612646222114563
          vf_loss: 0.012048506105525627
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 184000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,184,4195.1,184000,-3.2764,-2.24,-4.48,327.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-24_09-18-07
  done: false
  episode_len_mean: 323.31
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.2330999999999754
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 3
  episodes_total: 561
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.0047313696808284
          entropy_coeff: 0.009999999999999998
          kl: 0.007922497480997019
          policy_loss: -0.11120826734436882
          total_loss: -0.10649751648306846
          vf_explained_var: 0.16241882741451263
          vf_loss: 0.014646653986225526
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 185000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,185,4219.24,185000,-3.2331,-2.24,-4.48,323.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-24_09-18-30
  done: false
  episode_len_mean: 318.83
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.188299999999976
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 565
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.0941379189491272
          entropy_coeff: 0.009999999999999998
          kl: 0.01162352418585877
          policy_loss: 0.03931584602428807
          total_loss: 0.038880849877993263
          vf_explained_var: 0.2342676818370819
          vf_loss: 0.010342929996032682
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 186000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,186,4241.84,186000,-3.1883,-2.24,-4.48,318.83


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-24_09-18-52
  done: false
  episode_len_mean: 316.92
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.169199999999977
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 3
  episodes_total: 568
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014062500000000006
          cur_lr: 5.000000000000001e-05
          entropy: 1.0148552437623342
          entropy_coeff: 0.009999999999999998
          kl: 0.004208825081632502
          policy_loss: -0.08179274996121724
          total_loss: -0.07806623743640052
          vf_explained_var: 0.046128612011671066
          vf_loss: 0.013815875713609987
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,187,4264.29,187000,-3.1692,-2.24,-4.48,316.92




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-24_09-19-34
  done: false
  episode_len_mean: 315.05
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.1504999999999774
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 572
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 0.9162542283535003
          entropy_coeff: 0.009999999999999998
          kl: 0.014439785887371236
          policy_loss: 0.011914116309748755
          total_loss: 0.01753151011135843
          vf_explained_var: 0.0393756702542305
          vf_loss: 0.014678407605323527
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 188000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,188,4305.4,188000,-3.1505,-2.24,-4.48,315.05


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-24_09-19-58
  done: false
  episode_len_mean: 313.9
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.1389999999999763
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 3
  episodes_total: 575
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007031250000000003
          cur_lr: 5.000000000000001e-05
          entropy: 0.8464949793285794
          entropy_coeff: 0.009999999999999998
          kl: 0.024863771073677692
          policy_loss: -0.11401751645737224
          total_loss: -0.10783302378323344
          vf_explained_var: 0.0664740800857544
          vf_loss: 0.014474618300381634
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,189,4329.58,189000,-3.139,-2.24,-4.48,313.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-24_09-20-23
  done: false
  episode_len_mean: 310.81
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.1080999999999777
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 579
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010546874999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.7504314992162916
          entropy_coeff: 0.009999999999999998
          kl: 0.010479130493711688
          policy_loss: 0.005313595218790902
          total_loss: 0.007898759014076656
          vf_explained_var: 0.23124147951602936
          vf_loss: 0.009978955761632985
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 190000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,190,4354.46,190000,-3.1081,-2.24,-4.48,310.81


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-24_09-20-49
  done: false
  episode_len_mean: 304.89
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.048899999999979
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 583
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010546874999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.6377560648653242
          entropy_coeff: 0.009999999999999998
          kl: 0.0056084278864659805
          policy_loss: -0.039224838299883734
          total_loss: -0.02959747165441513
          vf_explained_var: 0.04889344051480293
          vf_loss: 0.015945776758922472
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 191000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,191,4380.91,191000,-3.0489,-2.24,-4.48,304.89


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-24_09-21-16
  done: false
  episode_len_mean: 298.62
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.9861999999999806
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 4
  episodes_total: 587
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010546874999999997
          cur_lr: 5.000000000000001e-05
          entropy: 0.6852959811687469
          entropy_coeff: 0.009999999999999998
          kl: 0.003629261640041174
          policy_loss: -0.1137866158452299
          total_loss: -0.10266065945227941
          vf_explained_var: 0.12166339159011841
          vf_loss: 0.01794063966307375
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 192000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,192,4407.66,192000,-2.9862,-2.24,-4.48,298.62


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-24_09-21-42
  done: false
  episode_len_mean: 293.98
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.9397999999999813
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 5
  episodes_total: 592
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.7669538974761962
          entropy_coeff: 0.009999999999999998
          kl: 0.011827676565330354
          policy_loss: -0.005766919172472424
          total_loss: 0.0006660285095373789
          vf_explained_var: 0.3229653537273407
          vf_loss: 0.014040116417325206
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 193000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,193,4434.13,193000,-2.9398,-2.24,-4.48,293.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-24_09-22-08
  done: false
  episode_len_mean: 290.24
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.902399999999982
  episode_reward_min: -4.479999999999949
  episodes_this_iter: 3
  episodes_total: 595
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.8445833451218075
          entropy_coeff: 0.009999999999999998
          kl: 0.013589703063238584
          policy_loss: -0.11853563611706099
          total_loss: -0.11529850843879912
          vf_explained_var: 0.46765056252479553
          vf_loss: 0.011611295719113615
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,194,4460.11,194000,-2.9024,-2.24,-4.48,290.24




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-24_09-22-51
  done: false
  episode_len_mean: 280.73
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.8072999999999837
  episode_reward_min: -4.069999999999958
  episodes_this_iter: 5
  episodes_total: 600
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.8989180624485016
          entropy_coeff: 0.009999999999999998
          kl: 0.016881465805045022
          policy_loss: -0.004981250067551931
          total_loss: -0.004279740610056453
          vf_explained_var: 0.6842136383056641
          vf_loss: 0.009601668014915453
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 195000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,195,4503.18,195000,-2.8073,-2.24,-4.07,280.73


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-24_09-23-16
  done: false
  episode_len_mean: 277.52
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.775199999999984
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 3
  episodes_total: 603
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 1.10825903084543
          entropy_coeff: 0.009999999999999998
          kl: 0.009756927087541101
          policy_loss: 0.04261741240819295
          total_loss: 0.04436029709047741
          vf_explained_var: 0.414367139339447
          vf_loss: 0.012774021918368008
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,196,4527.92,196000,-2.7752,-2.24,-3.59,277.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-24_09-23-39
  done: false
  episode_len_mean: 276.01
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7600999999999853
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 3
  episodes_total: 606
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.9501050843132867
          entropy_coeff: 0.009999999999999998
          kl: 0.015163061417526534
          policy_loss: -0.0771956322921647
          total_loss: -0.0722233562833733
          vf_explained_var: 0.3127531409263611
          vf_loss: 0.014393361409505208
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 197000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,197,4550.58,197000,-2.7601,-2.24,-3.59,276.01


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-24_09-24-02
  done: false
  episode_len_mean: 275.12
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7511999999999848
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 610
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.9246089140574137
          entropy_coeff: 0.009999999999999998
          kl: 0.007357286029849784
          policy_loss: 0.027601581977473366
          total_loss: 0.03191153564386898
          vf_explained_var: 0.25249236822128296
          vf_loss: 0.013517245246718327
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 198000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,198,4573.71,198000,-2.7512,-2.24,-3.59,275.12


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-24_09-24-26
  done: false
  episode_len_mean: 273.44
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7343999999999857
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 3
  episodes_total: 613
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.8579705191983117
          entropy_coeff: 0.009999999999999998
          kl: 0.011643961659285138
          policy_loss: -0.1258517359693845
          total_loss: -0.12189288031723765
          vf_explained_var: 0.28446245193481445
          vf_loss: 0.012477155557523172
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,199,4597.3,199000,-2.7344,-2.24,-3.36,273.44


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-24_09-24-48
  done: false
  episode_len_mean: 272.15
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7214999999999856
  episode_reward_min: -3.189999999999976
  episodes_this_iter: 4
  episodes_total: 617
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.8498096982638041
          entropy_coeff: 0.009999999999999998
          kl: 0.005471120873725965
          policy_loss: 0.0659836181335979
          total_loss: 0.0694425602753957
          vf_explained_var: 0.1635911911725998
          vf_loss: 0.011928186694987946
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,200,4619.62,200000,-2.7215,-2.24,-3.19,272.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-24_09-25-12
  done: false
  episode_len_mean: 270.8
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7079999999999864
  episode_reward_min: -3.089999999999978
  episodes_this_iter: 4
  episodes_total: 621
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.829505189259847
          entropy_coeff: 0.009999999999999998
          kl: 0.009065927012292087
          policy_loss: -0.013289888492888875
          total_loss: -0.007592696075638135
          vf_explained_var: 0.12677377462387085
          vf_loss: 0.013944433732993073
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,201,4643.63,201000,-2.708,-2.24,-3.09,270.8


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-24_09-25-34
  done: false
  episode_len_mean: 270.93
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.709299999999986
  episode_reward_min: -3.089999999999978
  episodes_this_iter: 3
  episodes_total: 624
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.7613679548104604
          entropy_coeff: 0.009999999999999998
          kl: 0.00709662629457368
          policy_loss: 0.05254074219200346
          total_loss: 0.055639647609657714
          vf_explained_var: 0.03896815702319145
          vf_loss: 0.010675159146517722
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,202,4665.52,202000,-2.7093,-2.24,-3.09,270.93


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-24_09-25-58
  done: false
  episode_len_mean: 271.31
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7130999999999865
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 3
  episodes_total: 627
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.6776842289500766
          entropy_coeff: 0.009999999999999998
          kl: 0.010076159427301345
          policy_loss: -0.11231108986669117
          total_loss: -0.1046509182287587
          vf_explained_var: 0.1250460147857666
          vf_loss: 0.014383881818503141
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,203,4689.38,203000,-2.7131,-2.24,-3.12,271.31




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-24_09-26-40
  done: false
  episode_len_mean: 270.85
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.708499999999986
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 631
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.6987946344746484
          entropy_coeff: 0.009999999999999998
          kl: 0.005109417044155314
          policy_loss: -0.053615400112337536
          total_loss: -0.04686129308409161
          vf_explained_var: 0.02957269363105297
          vf_loss: 0.013715107840188366
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 204000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,204,4731.83,204000,-2.7085,-2.24,-3.12,270.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-24_09-27-04
  done: false
  episode_len_mean: 270.69
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.706899999999986
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 4
  episodes_total: 635
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005273437499999999
          cur_lr: 5.000000000000001e-05
          entropy: 0.682581326034334
          entropy_coeff: 0.009999999999999998
          kl: 0.004504878386422975
          policy_loss: 0.02998970929119322
          total_loss: 0.036786277012692555
          vf_explained_var: 0.16441980004310608
          vf_loss: 0.01359862731769681
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,205,4755.87,205000,-2.7069,-2.24,-3.12,270.69


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-24_09-27-27
  done: false
  episode_len_mean: 271.54
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7153999999999865
  episode_reward_min: -3.1199999999999775
  episodes_this_iter: 3
  episodes_total: 638
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026367187499999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.7162509825494554
          entropy_coeff: 0.009999999999999998
          kl: 0.011571867532039743
          policy_loss: -0.07549912598397997
          total_loss: -0.06895272955298423
          vf_explained_var: 0.03502770885825157
          vf_loss: 0.013678392654077874
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 20600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,206,4778.87,206000,-2.7154,-2.24,-3.12,271.54


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-24_09-27-49
  done: false
  episode_len_mean: 272.23
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.722299999999986
  episode_reward_min: -3.1999999999999758
  episodes_this_iter: 4
  episodes_total: 642
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026367187499999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.6910538600550757
          entropy_coeff: 0.009999999999999998
          kl: 0.006478074073574507
          policy_loss: 0.0429423976275656
          total_loss: 0.050013127426306404
          vf_explained_var: 0.06768281012773514
          vf_loss: 0.0139641883265641
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,207,4800.93,207000,-2.7223,-2.25,-3.2,272.23


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-24_09-28-11
  done: false
  episode_len_mean: 273.25
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.732499999999986
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 645
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026367187499999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.7176627006795672
          entropy_coeff: 0.009999999999999998
          kl: 0.006646126543123524
          policy_loss: 0.048467638840277986
          total_loss: 0.0530634885860814
          vf_explained_var: -0.020498132333159447
          vf_loss: 0.011754947605853279
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,208,4822.58,208000,-2.7325,-2.25,-3.33,273.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-24_09-28-33
  done: false
  episode_len_mean: 274.59
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.7458999999999856
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 648
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026367187499999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.7317285378774007
          entropy_coeff: 0.009999999999999998
          kl: 0.007541014228045414
          policy_loss: -0.03608293450540966
          total_loss: -0.030687562459044988
          vf_explained_var: -0.21096216142177582
          vf_loss: 0.012692767858000782
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 2090

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,209,4844.01,209000,-2.7459,-2.25,-3.33,274.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-24_09-28-56
  done: false
  episode_len_mean: 275.59
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.755899999999985
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 652
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026367187499999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.6845016512605879
          entropy_coeff: 0.009999999999999998
          kl: 0.010847869576323117
          policy_loss: 0.016808301624324586
          total_loss: 0.026965695205661985
          vf_explained_var: 0.06163075193762779
          vf_loss: 0.016973805696600012
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 210000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,210,4867.26,210000,-2.7559,-2.25,-3.33,275.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-24_09-29-18
  done: false
  episode_len_mean: 277.02
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.770199999999985
  episode_reward_min: -3.369999999999972
  episodes_this_iter: 3
  episodes_total: 655
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026367187499999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.6990063144101037
          entropy_coeff: 0.009999999999999998
          kl: 0.014131637589043362
          policy_loss: 0.04210004260142644
          total_loss: 0.04843143762813674
          vf_explained_var: 0.26493602991104126
          vf_loss: 0.013284197908392848
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 211000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,211,4889.53,211000,-2.7702,-2.25,-3.37,277.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-24_09-29-41
  done: false
  episode_len_mean: 278.29
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.7828999999999837
  episode_reward_min: -3.369999999999972
  episodes_this_iter: 3
  episodes_total: 658
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0026367187499999993
          cur_lr: 5.000000000000001e-05
          entropy: 0.7907501313421461
          entropy_coeff: 0.009999999999999998
          kl: 0.021872112006661205
          policy_loss: -0.10596111855573125
          total_loss: -0.09594203498628405
          vf_explained_var: 0.09261095523834229
          vf_loss: 0.017868916918006208
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,212,4912.6,212000,-2.7829,-2.25,-3.37,278.29




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-24_09-30-19
  done: false
  episode_len_mean: 279.51
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.7950999999999833
  episode_reward_min: -3.369999999999972
  episodes_this_iter: 4
  episodes_total: 662
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003955078125
          cur_lr: 5.000000000000001e-05
          entropy: 0.8467236002286275
          entropy_coeff: 0.009999999999999998
          kl: 0.006966149776747501
          policy_loss: 0.027483062280548944
          total_loss: 0.03572603455848164
          vf_explained_var: 0.10971072316169739
          vf_loss: 0.016682655674715836
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 213000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,213,4950.73,213000,-2.7951,-2.25,-3.37,279.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-24_09-30-39
  done: false
  episode_len_mean: 280.41
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.8040999999999836
  episode_reward_min: -3.45999999999997
  episodes_this_iter: 2
  episodes_total: 664
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003955078125
          cur_lr: 5.000000000000001e-05
          entropy: 0.8373099956247542
          entropy_coeff: 0.009999999999999998
          kl: 0.010538531816393614
          policy_loss: -0.08154165397087733
          total_loss: -0.0781905750433604
          vf_explained_var: 0.049423884600400925
          vf_loss: 0.011682500873899295
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,214,4970.32,214000,-2.8041,-2.25,-3.46,280.41


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-24_09-31-00
  done: false
  episode_len_mean: 282.24
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.822399999999984
  episode_reward_min: -3.7699999999999636
  episodes_this_iter: 3
  episodes_total: 667
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003955078125
          cur_lr: 5.000000000000001e-05
          entropy: 0.8711178229914771
          entropy_coeff: 0.009999999999999998
          kl: 0.012322433145316225
          policy_loss: -0.10331842228770256
          total_loss: -0.09743738853269153
          vf_explained_var: 0.10763037949800491
          vf_loss: 0.014543475976420774
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 215000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,215,4991.11,215000,-2.8224,-2.25,-3.77,282.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-24_09-31-19
  done: false
  episode_len_mean: 284.74
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.847399999999983
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 670
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003955078125
          cur_lr: 5.000000000000001e-05
          entropy: 0.8560397969351874
          entropy_coeff: 0.009999999999999998
          kl: 0.00438354300100245
          policy_loss: -0.02011920909086863
          total_loss: -0.016134812848435508
          vf_explained_var: 0.11540555953979492
          vf_loss: 0.01252745538432565
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 216000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,216,5010.29,216000,-2.8474,-2.25,-3.85,284.74


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-24_09-31-38
  done: false
  episode_len_mean: 286.6
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.865999999999983
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 673
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019775390625
          cur_lr: 5.000000000000001e-05
          entropy: 0.9173774116569096
          entropy_coeff: 0.009999999999999998
          kl: 0.01186969895987065
          policy_loss: 0.03649543250600497
          total_loss: 0.03948986033598582
          vf_explained_var: 0.19331346452236176
          vf_loss: 0.012144725153403771
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 217000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,217,5029.22,217000,-2.866,-2.25,-3.85,286.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-24_09-31-56
  done: false
  episode_len_mean: 289.52
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.8951999999999822
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 676
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019775390625
          cur_lr: 5.000000000000001e-05
          entropy: 0.8454334563679166
          entropy_coeff: 0.009999999999999998
          kl: 0.005252527090567859
          policy_loss: 0.056204361385769314
          total_loss: 0.05965979082716836
          vf_explained_var: -0.11869246512651443
          vf_loss: 0.011899376235346103
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,218,5047.15,218000,-2.8952,-2.25,-4,289.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-24_09-32-16
  done: false
  episode_len_mean: 292.03
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.9202999999999815
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 679
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019775390625
          cur_lr: 5.000000000000001e-05
          entropy: 0.794838007291158
          entropy_coeff: 0.009999999999999998
          kl: 0.01126811290230856
          policy_loss: 0.03195317544870906
          total_loss: 0.03597019770079189
          vf_explained_var: -0.00602182699367404
          vf_loss: 0.011943119010538794
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,219,5066.84,219000,-2.9203,-2.25,-4,292.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-24_09-32-38
  done: false
  episode_len_mean: 293.29
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.932899999999981
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 682
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019775390625
          cur_lr: 5.000000000000001e-05
          entropy: 0.7598089834054311
          entropy_coeff: 0.009999999999999998
          kl: 0.006962395494817386
          policy_loss: 0.04727683671646648
          total_loss: 0.05124665722250939
          vf_explained_var: 0.0705261155962944
          vf_loss: 0.011554141420250137
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 220000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,220,5089.44,220000,-2.9329,-2.25,-4,293.29


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-24_09-33-02
  done: false
  episode_len_mean: 294.32
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.9431999999999814
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 4
  episodes_total: 686
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019775390625
          cur_lr: 5.000000000000001e-05
          entropy: 0.6714432272646162
          entropy_coeff: 0.009999999999999998
          kl: 0.008032189131959161
          policy_loss: 0.022115484873453776
          total_loss: 0.03069432477156321
          vf_explained_var: 0.08153916150331497
          vf_loss: 0.015277387325962384
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,221,5113.39,221000,-2.9432,-2.25,-4,294.32


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-24_09-33-26
  done: false
  episode_len_mean: 295.49
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.9548999999999803
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 689
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0019775390625
          cur_lr: 5.000000000000001e-05
          entropy: 0.865153306722641
          entropy_coeff: 0.009999999999999998
          kl: 0.025922558279099988
          policy_loss: 0.02618957700000869
          total_loss: 0.028132074657413693
          vf_explained_var: 0.23936070501804352
          vf_loss: 0.010542765187306537
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,222,5136.77,222000,-2.9549,-2.25,-4,295.49




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-24_09-34-07
  done: false
  episode_len_mean: 296.6
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.965999999999981
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 4
  episodes_total: 693
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029663085937500014
          cur_lr: 5.000000000000001e-05
          entropy: 1.0465031663576763
          entropy_coeff: 0.009999999999999998
          kl: 0.013567809773372849
          policy_loss: 0.013674607459041808
          total_loss: 0.02038939976029926
          vf_explained_var: 0.09890877455472946
          vf_loss: 0.017139574461099174
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,223,5177.85,223000,-2.966,-2.25,-4,296.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-24_09-34-28
  done: false
  episode_len_mean: 298.46
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.9845999999999804
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 696
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029663085937500014
          cur_lr: 5.000000000000001e-05
          entropy: 1.1139583792951373
          entropy_coeff: 0.009999999999999998
          kl: 0.012125235852736003
          policy_loss: 0.051551187617911234
          total_loss: 0.05120601322915819
          vf_explained_var: 0.23145675659179688
          vf_loss: 0.010758443555742916
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,224,5199.62,224000,-2.9846,-2.25,-4,298.46


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-24_09-34-51
  done: false
  episode_len_mean: 299.76
  episode_media: {}
  episode_reward_max: -2.249999999999996
  episode_reward_mean: -2.9975999999999807
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 699
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029663085937500014
          cur_lr: 5.000000000000001e-05
          entropy: 1.051825280321969
          entropy_coeff: 0.009999999999999998
          kl: 0.00711656880285188
          policy_loss: -0.11383669715788629
          total_loss: -0.10968500384026103
          vf_explained_var: 0.3259303867816925
          vf_loss: 0.014648836799379852
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,225,5222.39,225000,-2.9976,-2.25,-4,299.76


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-24_09-35-13
  done: false
  episode_len_mean: 301.11
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.011099999999979
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 4
  episodes_total: 703
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029663085937500014
          cur_lr: 5.000000000000001e-05
          entropy: 1.1668240931298999
          entropy_coeff: 0.009999999999999998
          kl: 0.010260898440246614
          policy_loss: -0.0073449584345022835
          total_loss: -0.0018942617707782322
          vf_explained_var: 0.13060230016708374
          vf_loss: 0.01708850052414669
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,226,5243.7,226000,-3.0111,-2.43,-4,301.11


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-24_09-35-34
  done: false
  episode_len_mean: 301.42
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.0141999999999793
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 706
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029663085937500014
          cur_lr: 5.000000000000001e-05
          entropy: 1.0463072233729893
          entropy_coeff: 0.009999999999999998
          kl: 0.007145120186509166
          policy_loss: 0.05496701614724265
          total_loss: 0.05514810383319855
          vf_explained_var: 0.1783415973186493
          vf_loss: 0.010622963443812397
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,227,5265.31,227000,-3.0142,-2.43,-4,301.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-24_09-35-58
  done: false
  episode_len_mean: 301.25
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.0124999999999806
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 709
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0029663085937500014
          cur_lr: 5.000000000000001e-05
          entropy: 0.8944700645075904
          entropy_coeff: 0.009999999999999998
          kl: 0.00478118498709341
          policy_loss: -0.1068779844376776
          total_loss: -0.10233834832906723
          vf_explained_var: 0.34625011682510376
          vf_loss: 0.013470151906626092
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 228000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,228,5289.31,228000,-3.0125,-2.43,-4,301.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-24_09-36-20
  done: false
  episode_len_mean: 302.08
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.0207999999999795
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 4
  episodes_total: 713
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014831542968750007
          cur_lr: 5.000000000000001e-05
          entropy: 1.0449641585350036
          entropy_coeff: 0.009999999999999998
          kl: 0.009914869077241721
          policy_loss: 0.023552653441826502
          total_loss: 0.029945331522160105
          vf_explained_var: 0.10308325290679932
          vf_loss: 0.016827611604498492
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 22900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,229,5310.89,229000,-3.0208,-2.43,-4,302.08


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-24_09-36-44
  done: false
  episode_len_mean: 301.36
  episode_media: {}
  episode_reward_max: -2.429999999999992
  episode_reward_mean: -3.013599999999979
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 3
  episodes_total: 716
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014831542968750007
          cur_lr: 5.000000000000001e-05
          entropy: 0.9005431287818485
          entropy_coeff: 0.009999999999999998
          kl: 0.010466645589409908
          policy_loss: -0.08888913740714391
          total_loss: -0.08283815922008621
          vf_explained_var: 0.193014457821846
          vf_loss: 0.015040888337211476
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,230,5335.22,230000,-3.0136,-2.43,-4,301.36




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-24_09-37-24
  done: false
  episode_len_mean: 301.25
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.012499999999979
  episode_reward_min: -3.9999999999999587
  episodes_this_iter: 4
  episodes_total: 720
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014831542968750007
          cur_lr: 5.000000000000001e-05
          entropy: 0.9496441417270236
          entropy_coeff: 0.009999999999999998
          kl: 0.004236021614676686
          policy_loss: 0.022161853189269703
          total_loss: 0.026823666733172206
          vf_explained_var: 0.2622843086719513
          vf_loss: 0.014151972045914995
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,231,5375.01,231000,-3.0125,-2.28,-4,301.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-24_09-37-43
  done: false
  episode_len_mean: 303.76
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.037599999999979
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 723
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007415771484375003
          cur_lr: 5.000000000000001e-05
          entropy: 1.318018811278873
          entropy_coeff: 0.009999999999999998
          kl: 0.019093074687854886
          policy_loss: 0.023116533706585566
          total_loss: 0.023925929516553878
          vf_explained_var: 0.02782437577843666
          vf_loss: 0.01397542329505086
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,232,5394.22,232000,-3.0376,-2.28,-4.32,303.76


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-24_09-38-10
  done: false
  episode_len_mean: 302.33
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.0232999999999794
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 727
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007415771484375003
          cur_lr: 5.000000000000001e-05
          entropy: 0.917121003733741
          entropy_coeff: 0.009999999999999998
          kl: 0.009341571668987332
          policy_loss: 0.02526189105378257
          total_loss: 0.028866763330168195
          vf_explained_var: 0.3030169904232025
          vf_loss: 0.012769153429609206
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,233,5420.47,233000,-3.0233,-2.28,-4.32,302.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-24_09-38-33
  done: false
  episode_len_mean: 302.94
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.0293999999999794
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 730
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007415771484375003
          cur_lr: 5.000000000000001e-05
          entropy: 0.9714514520433214
          entropy_coeff: 0.009999999999999998
          kl: 0.007782934387197571
          policy_loss: -0.05849104109737608
          total_loss: -0.0566282225979699
          vf_explained_var: 0.3061445951461792
          vf_loss: 0.011571557596067174
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,234,5444.37,234000,-3.0294,-2.28,-4.32,302.94


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-24_09-38-58
  done: false
  episode_len_mean: 302.66
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.026599999999979
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 734
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007415771484375003
          cur_lr: 5.000000000000001e-05
          entropy: 0.9595751040511661
          entropy_coeff: 0.009999999999999998
          kl: 0.007814270756326517
          policy_loss: 0.021541195611159008
          total_loss: 0.028739330834812588
          vf_explained_var: 0.09950647503137589
          vf_loss: 0.016788093207610977
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,235,5469.04,235000,-3.0266,-2.28,-4.32,302.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-24_09-39-21
  done: false
  episode_len_mean: 302.35
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.0234999999999785
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 737
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007415771484375003
          cur_lr: 5.000000000000001e-05
          entropy: 1.0945857054657406
          entropy_coeff: 0.009999999999999998
          kl: 0.010297430134883238
          policy_loss: -0.10875963850153816
          total_loss: -0.1029748490287198
          vf_explained_var: 0.17495915293693542
          vf_loss: 0.016723012261920505
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,236,5492.17,236000,-3.0235,-2.28,-4.32,302.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-24_09-39-44
  done: false
  episode_len_mean: 303.01
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.0300999999999796
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 741
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007415771484375003
          cur_lr: 5.000000000000001e-05
          entropy: 1.1138958851496379
          entropy_coeff: 0.009999999999999998
          kl: 0.010943999704523696
          policy_loss: 0.03087330808242162
          total_loss: 0.03495210972097185
          vf_explained_var: 0.23733194172382355
          vf_loss: 0.015209642890840768
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,237,5514.78,237000,-3.0301,-2.28,-4.32,303.01


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-24_09-40-09
  done: false
  episode_len_mean: 301.04
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.0103999999999793
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 745
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007415771484375003
          cur_lr: 5.000000000000001e-05
          entropy: 0.9651622454325358
          entropy_coeff: 0.009999999999999998
          kl: 0.010711594292686736
          policy_loss: -0.007993876644306713
          total_loss: -0.0030177159855763118
          vf_explained_var: 0.26331302523612976
          vf_loss: 0.014619839325961139
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,238,5540.21,238000,-3.0104,-2.28,-4.32,301.04


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-24_09-40-35
  done: false
  episode_len_mean: 298.94
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9893999999999803
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 749
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007415771484375003
          cur_lr: 5.000000000000001e-05
          entropy: 0.8688527180088891
          entropy_coeff: 0.009999999999999998
          kl: 0.010135313193396951
          policy_loss: -0.008780756427182092
          total_loss: -0.0038496378395292494
          vf_explained_var: 0.23525837063789368
          vf_loss: 0.013612126745283604
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 23

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,239,5565.98,239000,-2.9894,-2.28,-4.32,298.94




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-24_09-41-20
  done: false
  episode_len_mean: 297.23
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.9722999999999815
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 753
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007415771484375003
          cur_lr: 5.000000000000001e-05
          entropy: 0.8688815765910678
          entropy_coeff: 0.009999999999999998
          kl: 0.004364293361486836
          policy_loss: 0.01431306724747022
          total_loss: 0.018395683417717617
          vf_explained_var: 0.2938463091850281
          vf_loss: 0.012768191554480128
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,240,5610.35,240000,-2.9723,-2.16,-4.32,297.23


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-24_09-41-46
  done: false
  episode_len_mean: 294.91
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.949099999999981
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 757
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 0.8778884960545434
          entropy_coeff: 0.009999999999999998
          kl: 0.006433959107182395
          policy_loss: 0.0202686725391282
          total_loss: 0.022322445528374778
          vf_explained_var: 0.41677623987197876
          vf_loss: 0.010830272930777735
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,241,5636.33,241000,-2.9491,-2.16,-4.32,294.91


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-24_09-42-11
  done: false
  episode_len_mean: 294.35
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.9434999999999807
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 760
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 0.9111018697420756
          entropy_coeff: 0.009999999999999998
          kl: 0.005953247325678982
          policy_loss: -0.11483190688822005
          total_loss: -0.11114660220013725
          vf_explained_var: 0.3644959032535553
          vf_loss: 0.012794114721732006
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,242,5661.47,242000,-2.9435,-2.16,-4.32,294.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-24_09-42-34
  done: false
  episode_len_mean: 292.02
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.9201999999999813
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 764
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 0.9890987416108449
          entropy_coeff: 0.009999999999999998
          kl: 0.013159130620975448
          policy_loss: 0.007187066309981876
          total_loss: 0.011968860940800772
          vf_explained_var: 0.2298000603914261
          vf_loss: 0.014667902203897635
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,243,5685.25,243000,-2.9202,-2.16,-4.32,292.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-24_09-42-59
  done: false
  episode_len_mean: 288.67
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.8866999999999825
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 768
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 1.022154227230284
          entropy_coeff: 0.009999999999999998
          kl: 0.005311144679796974
          policy_loss: 0.025549709051847457
          total_loss: 0.02799701831407017
          vf_explained_var: 0.36400240659713745
          vf_loss: 0.012666882088200913
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,244,5709.65,244000,-2.8867,-2.16,-4.32,288.67


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-24_09-43-23
  done: false
  episode_len_mean: 286.46
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.864599999999983
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 771
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 1.0523032936784955
          entropy_coeff: 0.009999999999999998
          kl: 0.005623462271266138
          policy_loss: -0.10335296193758646
          total_loss: -0.10280194133520126
          vf_explained_var: 0.4670696556568146
          vf_loss: 0.011071967457731565
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,245,5733.8,245000,-2.8646,-2.16,-4.32,286.46


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-24_09-43-46
  done: false
  episode_len_mean: 283.26
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.8325999999999834
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 775
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 1.151618892616696
          entropy_coeff: 0.009999999999999998
          kl: 0.008920118859561418
          policy_loss: 0.023039223957392906
          total_loss: 0.027515455956260364
          vf_explained_var: 0.13099405169487
          vf_loss: 0.015989114613168768
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,246,5756.51,246000,-2.8326,-2.16,-4.32,283.26


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-24_09-44-08
  done: false
  episode_len_mean: 281.39
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.813899999999984
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 778
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 1.2422691371705796
          entropy_coeff: 0.009999999999999998
          kl: 0.019502585076028863
          policy_loss: -0.01106519897778829
          total_loss: -0.012944994866847992
          vf_explained_var: 0.06014329940080643
          vf_loss: 0.010535661157983768
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,247,5778.86,247000,-2.8139,-2.16,-4.32,281.39




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-24_09-44-47
  done: false
  episode_len_mean: 281.56
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.815599999999984
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 781
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 1.1247439278496636
          entropy_coeff: 0.009999999999999998
          kl: 0.012931507333363177
          policy_loss: -0.1214686657819483
          total_loss: -0.11802820050054126
          vf_explained_var: 0.13676996529102325
          vf_loss: 0.014683111425903108
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,248,5818.11,248000,-2.8156,-2.16,-4.32,281.56


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-24_09-45-10
  done: false
  episode_len_mean: 282.82
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.828199999999983
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 785
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 1.1387056959999933
          entropy_coeff: 0.009999999999999998
          kl: 0.008360248631903665
          policy_loss: 0.008479065696398417
          total_loss: 0.012173971864912245
          vf_explained_var: 0.086151123046875
          vf_loss: 0.015078864557047685
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,249,5840.19,249000,-2.8282,-2.16,-4.32,282.82


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-24_09-45-30
  done: false
  episode_len_mean: 283.92
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.8391999999999835
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 788
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 1.0970475713411967
          entropy_coeff: 0.009999999999999998
          kl: 0.012403026973361066
          policy_loss: 0.04671477542983161
          total_loss: 0.04655041429731581
          vf_explained_var: 0.17448478937149048
          vf_loss: 0.010801515227972737
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,250,5861.04,250000,-2.8392,-2.16,-4.32,283.92


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-24_09-45-50
  done: false
  episode_len_mean: 286.15
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.8614999999999826
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 791
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 1.0135544505384233
          entropy_coeff: 0.009999999999999998
          kl: 0.009569666806360051
          policy_loss: 0.04882367716895209
          total_loss: 0.050283071067598134
          vf_explained_var: 0.04919978603720665
          vf_loss: 0.011591389754580126
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,251,5880.41,251000,-2.8615,-2.16,-4.32,286.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-24_09-46-11
  done: false
  episode_len_mean: 286.37
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.863699999999983
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 794
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003707885742187502
          cur_lr: 5.000000000000001e-05
          entropy: 0.9759808679421743
          entropy_coeff: 0.009999999999999998
          kl: 0.02123327687651487
          policy_loss: 0.04835246495074696
          total_loss: 0.048560577962133616
          vf_explained_var: 0.22647033631801605
          vf_loss: 0.00996004802097256
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 252000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,252,5902.02,252000,-2.8637,-2.16,-4.32,286.37


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-24_09-46-34
  done: false
  episode_len_mean: 285.92
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.8591999999999826
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 797
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 0.927892064385944
          entropy_coeff: 0.009999999999999998
          kl: 0.011878543236801894
          policy_loss: -0.0854082149763902
          total_loss: -0.07691190549068981
          vf_explained_var: 0.03923255205154419
          vf_loss: 0.017768619685537286
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 253000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,253,5924.83,253000,-2.8592,-2.16,-4.32,285.92


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-24_09-46-56
  done: false
  episode_len_mean: 285.91
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.8590999999999824
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 4
  episodes_total: 801
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 1.0703293681144714
          entropy_coeff: 0.009999999999999998
          kl: 0.013473541458033762
          policy_loss: -0.01524277784758144
          total_loss: -0.00914615285065439
          vf_explained_var: 0.12239428609609604
          vf_loss: 0.016792421436144248
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,254,5946.64,254000,-2.8591,-2.16,-4.32,285.91


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-24_09-47-17
  done: false
  episode_len_mean: 286.56
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.8655999999999824
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 804
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 1.0757480336560143
          entropy_coeff: 0.009999999999999998
          kl: 0.018382704987050445
          policy_loss: 0.0778891576661004
          total_loss: 0.075915218061871
          vf_explained_var: -0.017648398876190186
          vf_loss: 0.008773318711124982
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 255000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,255,5967.59,255000,-2.8656,-2.16,-4.32,286.56


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-24_09-47-38
  done: false
  episode_len_mean: 287.48
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.874799999999983
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 807
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 1.1161467141575283
          entropy_coeff: 0.009999999999999998
          kl: 0.00675511649123173
          policy_loss: 0.04559264837039841
          total_loss: 0.04702669348981645
          vf_explained_var: -0.026793548837304115
          vf_loss: 0.012591752911814386
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,256,5988.43,256000,-2.8748,-2.16,-4.32,287.48




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-24_09-48-16
  done: false
  episode_len_mean: 288.15
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.881499999999982
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 810
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 1.2101754585901896
          entropy_coeff: 0.009999999999999998
          kl: 0.010217802873360743
          policy_loss: 0.028012228508790333
          total_loss: 0.027157134314378103
          vf_explained_var: 0.21418631076812744
          vf_loss: 0.011240978110840337
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 257000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,257,6026.69,257000,-2.8815,-2.16,-4.32,288.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-24_09-48-37
  done: false
  episode_len_mean: 288.71
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.887099999999983
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 813
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 1.215063864654965
          entropy_coeff: 0.009999999999999998
          kl: 0.008813564852678842
          policy_loss: -0.042052259047826133
          total_loss: -0.04255525817473729
          vf_explained_var: 0.28571778535842896
          vf_loss: 0.011642735999905401
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,258,6047.5,258000,-2.8871,-2.16,-4.32,288.71


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-24_09-48-56
  done: false
  episode_len_mean: 291.2
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.9119999999999817
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 816
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 1.3660906341340806
          entropy_coeff: 0.009999999999999998
          kl: 0.012330511643732302
          policy_loss: 0.039598216861486436
          total_loss: 0.03896393221285608
          vf_explained_var: 0.044322460889816284
          vf_loss: 0.013019763305783272
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,259,6066.7,259000,-2.912,-2.16,-4.32,291.2


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-24_09-49-18
  done: false
  episode_len_mean: 292.22
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.922199999999981
  episode_reward_min: -4.319999999999952
  episodes_this_iter: 3
  episodes_total: 819
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 1.119860772954093
          entropy_coeff: 0.009999999999999998
          kl: 0.00857860451110876
          policy_loss: 0.045173606193727914
          total_loss: 0.046066748268074456
          vf_explained_var: -0.21173135936260223
          vf_loss: 0.012086976580192439
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 260000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,260,6088.41,260000,-2.9222,-2.16,-4.32,292.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-24_09-49-40
  done: false
  episode_len_mean: 291.27
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.912699999999981
  episode_reward_min: -3.6699999999999657
  episodes_this_iter: 3
  episodes_total: 822
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 1.2765749904844497
          entropy_coeff: 0.009999999999999998
          kl: 0.00841866994010223
          policy_loss: -0.015924158526791465
          total_loss: -0.01610975277920564
          vf_explained_var: 0.22520260512828827
          vf_loss: 0.012575476119915644
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,261,6109.92,261000,-2.9127,-2.16,-3.67,291.27


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-24_09-50-01
  done: false
  episode_len_mean: 292.66
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.9265999999999814
  episode_reward_min: -3.6699999999999657
  episodes_this_iter: 3
  episodes_total: 825
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005561828613281248
          cur_lr: 5.000000000000001e-05
          entropy: 1.4066333916452196
          entropy_coeff: 0.009999999999999998
          kl: 0.03550715946047427
          policy_loss: -0.09448992104993927
          total_loss: -0.09176690528790156
          vf_explained_var: 0.24079614877700806
          vf_loss: 0.01676960141501493
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 262000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,262,6130.91,262000,-2.9266,-2.16,-3.67,292.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-24_09-50-20
  done: false
  episode_len_mean: 296.42
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.964199999999981
  episode_reward_min: -4.689999999999944
  episodes_this_iter: 3
  episodes_total: 828
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008342742919921875
          cur_lr: 5.000000000000001e-05
          entropy: 2.009610546959771
          entropy_coeff: 0.009999999999999998
          kl: 0.011680892082561517
          policy_loss: -0.0013419845037990147
          total_loss: -0.008872084816296895
          vf_explained_var: -0.1434735655784607
          vf_loss: 0.012556262621688397
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 26300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,263,6150.11,263000,-2.9642,-2.16,-4.69,296.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-24_09-50-38
  done: false
  episode_len_mean: 299.05
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.9904999999999804
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 2
  episodes_total: 830
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008342742919921875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7542042785220675
          entropy_coeff: 0.009999999999999998
          kl: 0.011839807452736788
          policy_loss: -0.052509689620799486
          total_loss: -0.06000116599930657
          vf_explained_var: 0.5761417150497437
          vf_loss: 0.010040686420527183
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,264,6168.22,264000,-2.9905,-2.16,-4.7,299.05


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-24_09-50-57
  done: false
  episode_len_mean: 303.14
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -3.0313999999999792
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 833
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008342742919921875
          cur_lr: 5.000000000000001e-05
          entropy: 1.7032871696684095
          entropy_coeff: 0.009999999999999998
          kl: 0.0243787233704002
          policy_loss: 0.07880658093425963
          total_loss: 0.07122831485337681
          vf_explained_var: 0.34951066970825195
          vf_loss: 0.009434270427542893
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 265000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,265,6187.01,265000,-3.0314,-2.16,-4.7,303.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-24_09-51-17
  done: false
  episode_len_mean: 304.99
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -3.049899999999979
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 2
  episodes_total: 835
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0012514114379882815
          cur_lr: 5.000000000000001e-05
          entropy: 1.6071575654877557
          entropy_coeff: 0.009999999999999998
          kl: 0.034309678445562375
          policy_loss: -0.18660563876231512
          total_loss: -0.1927170784937011
          vf_explained_var: 0.687069296836853
          vf_loss: 0.009917198695216536
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 266000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,266,6207.24,266000,-3.0499,-2.16,-4.7,304.99


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-24_09-51-38
  done: false
  episode_len_mean: 306.75
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -3.0674999999999786
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 839
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 1.1992564479509988
          entropy_coeff: 0.009999999999999998
          kl: 0.007466039309543327
          policy_loss: -0.017314413603809146
          total_loss: -0.015327742613024182
          vf_explained_var: 0.4927230775356293
          vf_loss: 0.013965223067336613
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 26700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,267,6228.68,267000,-3.0675,-2.16,-4.7,306.75




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-24_09-52-19
  done: false
  episode_len_mean: 306.98
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -3.0697999999999785
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 842
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 1.0620182500945197
          entropy_coeff: 0.009999999999999998
          kl: 0.009090799448297639
          policy_loss: 0.0634146726793713
          total_loss: 0.062212695015801324
          vf_explained_var: 0.5825045108795166
          vf_loss: 0.009401138701812467
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 268000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,268,6269.42,268000,-3.0698,-2.16,-4.7,306.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-24_09-52-42
  done: false
  episode_len_mean: 308.07
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -3.0806999999999776
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 845
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 0.8912000868055555
          entropy_coeff: 0.009999999999999998
          kl: 0.00553081294094656
          policy_loss: -0.040292333645953075
          total_loss: -0.03716717205113835
          vf_explained_var: 0.27507346868515015
          vf_loss: 0.01202677869134479
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,269,6292.04,269000,-3.0807,-2.16,-4.7,308.07


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-24_09-53-05
  done: false
  episode_len_mean: 309.13
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -3.0912999999999773
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 849
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 0.8967794842190213
          entropy_coeff: 0.009999999999999998
          kl: 0.008177200784159193
          policy_loss: 0.016769423335790633
          total_loss: 0.021828778377837605
          vf_explained_var: 0.31060850620269775
          vf_loss: 0.014011802648504575
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,270,6315.57,270000,-3.0913,-2.16,-4.7,309.13


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-24_09-53-30
  done: false
  episode_len_mean: 310.1
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.1009999999999773
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 852
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 0.8002208941512637
          entropy_coeff: 0.009999999999999998
          kl: 0.006397449932409612
          policy_loss: -0.11536857924527592
          total_loss: -0.10991485218207041
          vf_explained_var: 0.23956073820590973
          vf_loss: 0.01344392995039622
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 271000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,271,6340.14,271000,-3.101,-2.44,-4.7,310.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-24_09-53-53
  done: false
  episode_len_mean: 311.51
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -3.1150999999999773
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 856
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 0.8543402228090499
          entropy_coeff: 0.009999999999999998
          kl: 0.00708658755173032
          policy_loss: 0.0483082115650177
          total_loss: 0.05106362832917107
          vf_explained_var: 0.1758381724357605
          vf_loss: 0.01128551717588885
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 272000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,272,6363.67,272000,-3.1151,-2.5,-4.7,311.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-24_09-54-18
  done: false
  episode_len_mean: 311.91
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.1190999999999782
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 860
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 0.8405386924743652
          entropy_coeff: 0.009999999999999998
          kl: 0.016158462174866075
          policy_loss: 0.04513773065474298
          total_loss: 0.04988320875498983
          vf_explained_var: 0.04122493788599968
          vf_loss: 0.01312053182369305
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 273000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,273,6387.87,273000,-3.1191,-2.55,-4.7,311.91


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-24_09-54-43
  done: false
  episode_len_mean: 311.66
  episode_media: {}
  episode_reward_max: -2.489999999999991
  episode_reward_mean: -3.1165999999999774
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 863
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 0.7513547857602437
          entropy_coeff: 0.009999999999999998
          kl: 0.007758527935940091
          policy_loss: -0.09515013032489353
          total_loss: -0.08735802041159736
          vf_explained_var: 0.04386414587497711
          vf_loss: 0.01529109411769443
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,274,6412.82,274000,-3.1166,-2.49,-4.7,311.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-24_09-55-08
  done: false
  episode_len_mean: 311.5
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -3.114999999999977
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 867
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 0.7170666972796123
          entropy_coeff: 0.009999999999999998
          kl: 0.017599598713932272
          policy_loss: -0.04255202131138908
          total_loss: -0.03305573893917931
          vf_explained_var: 0.0642705038189888
          vf_loss: 0.016633914881903265
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,275,6438.63,275000,-3.115,-2.44,-4.7,311.5




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-24_09-55-51
  done: false
  episode_len_mean: 310.18
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.1017999999999777
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 871
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018771171569824212
          cur_lr: 5.000000000000001e-05
          entropy: 0.8344167199399736
          entropy_coeff: 0.009999999999999998
          kl: 0.00423065267879728
          policy_loss: -0.1150497919983334
          total_loss: -0.10438452313343684
          vf_explained_var: 0.13489045202732086
          vf_loss: 0.019001493665079274
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 276000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,276,6481.39,276000,-3.1018,-2.24,-4.7,310.18


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-24_09-56-18
  done: false
  episode_len_mean: 308.8
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.087999999999978
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 875
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.7306432803471883
          entropy_coeff: 0.009999999999999998
          kl: 0.0062474405697575406
          policy_loss: -0.10526559154192607
          total_loss: -0.09584599344266785
          vf_explained_var: 0.16280092298984528
          vf_loss: 0.01672016933767332
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 277000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,277,6507.68,277000,-3.088,-2.24,-4.7,308.8


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-24_09-56-44
  done: false
  episode_len_mean: 305.87
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.0586999999999778
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 5
  episodes_total: 880
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.7388110909197065
          entropy_coeff: 0.009999999999999998
          kl: 0.013716297967075599
          policy_loss: -0.016139251159297097
          total_loss: -0.009412522407041655
          vf_explained_var: 0.2772153913974762
          vf_loss: 0.014101966046210793
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 27800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,278,6534.28,278000,-3.0587,-2.24,-4.7,305.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-24_09-57-10
  done: false
  episode_len_mean: 303.14
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.0313999999999792
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 884
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.816157865524292
          entropy_coeff: 0.009999999999999998
          kl: 0.016815412731005393
          policy_loss: 0.0024512574076652528
          total_loss: 0.00481951062877973
          vf_explained_var: 0.4290279746055603
          vf_loss: 0.010514050359941192
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,279,6560.38,279000,-3.0314,-2.24,-4.7,303.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-24_09-57-36
  done: false
  episode_len_mean: 301.65
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.0164999999999793
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 887
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 0.949845693508784
          entropy_coeff: 0.009999999999999998
          kl: 0.010443467873205412
          policy_loss: -0.09952030728260676
          total_loss: -0.09854679356018702
          vf_explained_var: 0.48277512192726135
          vf_loss: 0.010462168272998599
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,280,6585.91,280000,-3.0165,-2.24,-4.7,301.65


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-24_09-58-00
  done: false
  episode_len_mean: 298.49
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.98489999999998
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 891
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.0375919494363997
          entropy_coeff: 0.009999999999999998
          kl: 0.015154653253575942
          policy_loss: -0.046325044416719016
          total_loss: -0.0487883637762732
          vf_explained_var: 0.6602362990379333
          vf_loss: 0.007898375645486845
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 281000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,281,6610.32,281000,-2.9849,-2.24,-4.7,298.49


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-24_09-58-26
  done: false
  episode_len_mean: 296.52
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.9651999999999803
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 895
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.0563539107640585
          entropy_coeff: 0.009999999999999998
          kl: 0.011028968424538653
          policy_loss: 0.0034592580050230026
          total_loss: 0.0008148627562655343
          vf_explained_var: 0.7523831129074097
          vf_loss: 0.007908790832799342
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 28200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,282,6636.04,282000,-2.9652,-2.24,-4.7,296.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-24_09-58-51
  done: false
  episode_len_mean: 295.48
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.9547999999999814
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 899
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.151063428322474
          entropy_coeff: 0.009999999999999998
          kl: 0.007338253370726496
          policy_loss: 0.0169158602754275
          total_loss: 0.012131714324156443
          vf_explained_var: 0.8011374473571777
          vf_loss: 0.006719600533445677
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 283000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,283,6660.96,283000,-2.9548,-2.24,-4.7,295.48




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-24_09-59-32
  done: false
  episode_len_mean: 295.1
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.9509999999999805
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 902
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.5066039972835117
          entropy_coeff: 0.009999999999999998
          kl: 0.011353435243018042
          policy_loss: 0.041424186527729036
          total_loss: 0.03195735663175583
          vf_explained_var: 0.8212405443191528
          vf_loss: 0.005588556353985849
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 284000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,284,6701.91,284000,-2.951,-2.24,-4.7,295.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-24_09-59-57
  done: false
  episode_len_mean: 293.9
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.938999999999981
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 906
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.4201854798528883
          entropy_coeff: 0.009999999999999998
          kl: 0.013381430728532262
          policy_loss: 0.03394714925024245
          total_loss: 0.024798532906505796
          vf_explained_var: 0.8881040215492249
          vf_loss: 0.005040679313242436
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 285000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,285,6726.52,285000,-2.939,-2.24,-4.7,293.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-24_10-00-21
  done: false
  episode_len_mean: 292.84
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.928399999999981
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 909
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.4515428205331167
          entropy_coeff: 0.009999999999999998
          kl: 0.018592192185189163
          policy_loss: 0.03891281858086586
          total_loss: 0.029255757894780902
          vf_explained_var: 0.8827378749847412
          vf_loss: 0.004840917342031996
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,286,6751.24,286000,-2.9284,-2.24,-4.7,292.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-24_10-00-46
  done: false
  episode_len_mean: 291.9
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.9189999999999814
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 913
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.2437067892816331
          entropy_coeff: 0.009999999999999998
          kl: 0.007365719618429692
          policy_loss: -0.024110800276199978
          total_loss: -0.02670218182934655
          vf_explained_var: 0.7789561152458191
          vf_loss: 0.009838774686472283
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 287000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,287,6775.91,287000,-2.919,-2.24,-4.7,291.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-24_10-01-11
  done: false
  episode_len_mean: 289.38
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.8937999999999824
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 916
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009385585784912106
          cur_lr: 5.000000000000001e-05
          entropy: 1.149012835820516
          entropy_coeff: 0.009999999999999998
          kl: 0.027725225494963014
          policy_loss: -0.0030988075253036286
          total_loss: -0.0064649121628867256
          vf_explained_var: 0.7285565733909607
          vf_loss: 0.008098003012128175
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 2880

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,288,6801.3,288000,-2.8938,-2.24,-4.7,289.38


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-24_10-01-36
  done: false
  episode_len_mean: 287.87
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.8786999999999825
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 920
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014078378677368168
          cur_lr: 5.000000000000001e-05
          entropy: 1.038434413406584
          entropy_coeff: 0.009999999999999998
          kl: 0.009333409243461904
          policy_loss: 0.0064425391455491384
          total_loss: 0.005948039972119861
          vf_explained_var: 0.640798807144165
          vf_loss: 0.009876705954472225
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 289000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,289,6825.57,289000,-2.8787,-2.24,-4.7,287.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-24_10-01-59
  done: false
  episode_len_mean: 287.24
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.8723999999999825
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 3
  episodes_total: 923
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014078378677368168
          cur_lr: 5.000000000000001e-05
          entropy: 0.8288607292705112
          entropy_coeff: 0.009999999999999998
          kl: 0.012778713222464372
          policy_loss: 0.05457076869077153
          total_loss: 0.052536389893955655
          vf_explained_var: 0.42652618885040283
          vf_loss: 0.006236242264923122
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,290,6848.99,290000,-2.8724,-2.24,-4.7,287.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-24_10-02-23
  done: false
  episode_len_mean: 283.67
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.8366999999999836
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 927
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014078378677368168
          cur_lr: 5.000000000000001e-05
          entropy: 0.8765174965063731
          entropy_coeff: 0.009999999999999998
          kl: 0.007890895426256733
          policy_loss: -0.001959945178694195
          total_loss: -0.002724985736939642
          vf_explained_var: 0.5217256546020508
          vf_loss: 0.007989026719911231
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 29100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,291,6873.06,291000,-2.8367,-2.24,-4.7,283.67




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-24_10-03-06
  done: false
  episode_len_mean: 278.79
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7878999999999836
  episode_reward_min: -4.699999999999944
  episodes_this_iter: 4
  episodes_total: 931
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014078378677368168
          cur_lr: 5.000000000000001e-05
          entropy: 0.8750265214178298
          entropy_coeff: 0.009999999999999998
          kl: 0.007846042202929107
          policy_loss: -0.019692500390940244
          total_loss: -0.016179453374611005
          vf_explained_var: 0.23055152595043182
          vf_loss: 0.012252267388006052
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 2920

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,292,6915.61,292000,-2.7879,-2.24,-4.7,278.79


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-24_10-03-32
  done: false
  episode_len_mean: 273.38
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7337999999999854
  episode_reward_min: -3.609999999999967
  episodes_this_iter: 4
  episodes_total: 935
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014078378677368168
          cur_lr: 5.000000000000001e-05
          entropy: 0.6885493901040819
          entropy_coeff: 0.009999999999999998
          kl: 0.004884144194511684
          policy_loss: -0.014779128051466412
          total_loss: -0.011132685674561394
          vf_explained_var: 0.3448481857776642
          vf_loss: 0.010525061593701443
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 29300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,293,6941.75,293000,-2.7338,-2.24,-3.61,273.38


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-24_10-03-58
  done: false
  episode_len_mean: 270.76
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7075999999999865
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 938
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007039189338684084
          cur_lr: 5.000000000000001e-05
          entropy: 0.6733316388395097
          entropy_coeff: 0.009999999999999998
          kl: 0.015598066439929425
          policy_loss: -0.09658333775069979
          total_loss: -0.09194874539971351
          vf_explained_var: 0.2363084852695465
          vf_loss: 0.011356927785608503
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,294,6967.31,294000,-2.7076,-2.24,-3.33,270.76


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-24_10-04-22
  done: false
  episode_len_mean: 269.46
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6945999999999857
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 942
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007039189338684084
          cur_lr: 5.000000000000001e-05
          entropy: 0.7416695449087355
          entropy_coeff: 0.009999999999999998
          kl: 0.036200388656365705
          policy_loss: -0.031144380900594924
          total_loss: -0.02810153224402004
          vf_explained_var: 0.22800792753696442
          vf_loss: 0.010434064155237541
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 29500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,295,6992.01,295000,-2.6946,-2.24,-3.33,269.46


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-24_10-04-44
  done: false
  episode_len_mean: 269.17
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6916999999999867
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 946
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.9724959797329373
          entropy_coeff: 0.009999999999999998
          kl: 0.008819038462538151
          policy_loss: -0.006035883186591996
          total_loss: -0.0030544977635145187
          vf_explained_var: 0.1919991672039032
          vf_loss: 0.012697033201240831
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 2960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,296,7014.05,296000,-2.6917,-2.24,-3.33,269.17


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-24_10-05-10
  done: false
  episode_len_mean: 268.36
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.683599999999986
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 949
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.6376617319054074
          entropy_coeff: 0.009999999999999998
          kl: 0.008066584993360751
          policy_loss: -0.09088224768638611
          total_loss: -0.08510082587599754
          vf_explained_var: 0.10645461827516556
          vf_loss: 0.012149518076330423
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 297000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,297,7039.93,297000,-2.6836,-2.24,-3.33,268.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-24_10-05-32
  done: false
  episode_len_mean: 268.96
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6895999999999862
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 953
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.7985936820507049
          entropy_coeff: 0.009999999999999998
          kl: 0.012366845965105986
          policy_loss: 0.054958919021818375
          total_loss: 0.05673690363764763
          vf_explained_var: 0.04723800718784332
          vf_loss: 0.009750861840115654
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 298000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,298,7061.8,298000,-2.6896,-2.24,-3.33,268.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-24_10-05-57
  done: false
  episode_len_mean: 268.02
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6801999999999873
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 957
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.7463649895456103
          entropy_coeff: 0.009999999999999998
          kl: 0.006482025349649787
          policy_loss: 0.014371275073952146
          total_loss: 0.02013580103715261
          vf_explained_var: 0.06536143273115158
          vf_loss: 0.013221334334876803
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 299000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,299,7086.93,299000,-2.6802,-2.24,-3.33,268.02




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-24_10-06-40
  done: false
  episode_len_mean: 267.88
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6787999999999874
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 960
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 0.7249318874544568
          entropy_coeff: 0.009999999999999998
          kl: 0.015739559453733894
          policy_loss: -0.10250056154198117
          total_loss: -0.09640490619672669
          vf_explained_var: 0.09848188608884811
          vf_loss: 0.013328351887563865
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 300000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,300,7129.39,300000,-2.6788,-2.24,-3.33,267.88


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-24_10-07-01
  done: false
  episode_len_mean: 269.51
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.6950999999999867
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 4
  episodes_total: 964
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 1.053720157676273
          entropy_coeff: 0.009999999999999998
          kl: 0.006820351853287102
          policy_loss: 0.009491446365912756
          total_loss: 0.013143770313925213
          vf_explained_var: 0.040692251175642014
          vf_loss: 0.014182323093215625
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,301,7150.89,301000,-2.6951,-2.24,-3.33,269.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-24_10-07-22
  done: false
  episode_len_mean: 271.48
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -2.7147999999999866
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 967
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0010558784008026126
          cur_lr: 5.000000000000001e-05
          entropy: 1.0624422828356426
          entropy_coeff: 0.009999999999999998
          kl: 0.020313788085901764
          policy_loss: 0.059706243044800225
          total_loss: 0.05880995152725114
          vf_explained_var: -0.23608729243278503
          vf_loss: 0.009706680856955549
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,302,7171.71,302000,-2.7148,-2.24,-3.33,271.48


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-24_10-07-44
  done: false
  episode_len_mean: 273.19
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.7318999999999853
  episode_reward_min: -3.329999999999973
  episodes_this_iter: 3
  episodes_total: 970
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 1.102066460582945
          entropy_coeff: 0.009999999999999998
          kl: 0.005899731583368073
          policy_loss: 0.06601527929306031
          total_loss: 0.0656431860393948
          vf_explained_var: 0.023311885073781013
          vf_loss: 0.010639226474126594
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 303000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,303,7193.88,303000,-2.7319,-2.39,-3.33,273.19


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-24_10-08-04
  done: false
  episode_len_mean: 275.91
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.7590999999999855
  episode_reward_min: -3.759999999999964
  episodes_this_iter: 3
  episodes_total: 973
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 1.2314035223590003
          entropy_coeff: 0.009999999999999998
          kl: 0.00700600856204553
          policy_loss: 0.05978641543123457
          total_loss: 0.058050600356525844
          vf_explained_var: -0.2666687071323395
          vf_loss: 0.010567123928598852
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 304000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,304,7213.47,304000,-2.7591,-2.39,-3.76,275.91


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-24_10-08-23
  done: false
  episode_len_mean: 279.44
  episode_media: {}
  episode_reward_max: -2.389999999999993
  episode_reward_mean: -2.794399999999984
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 976
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 1.2861849281522963
          entropy_coeff: 0.009999999999999998
          kl: 0.011881724895991791
          policy_loss: 0.09512430048651166
          total_loss: 0.09006282852755652
          vf_explained_var: 0.2968853712081909
          vf_loss: 0.007781562277361647
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 305000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,305,7232.77,305000,-2.7944,-2.39,-3.85,279.44


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-24_10-08-43
  done: false
  episode_len_mean: 281.57
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.8156999999999837
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 2
  episodes_total: 978
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 1.2898368848694695
          entropy_coeff: 0.009999999999999998
          kl: 0.00881104717230661
          policy_loss: -0.08301386568281385
          total_loss: -0.0837062292628818
          vf_explained_var: 0.10130658745765686
          vf_loss: 0.012192050388289823
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 306000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,306,7252.83,306000,-2.8157,-2.4,-3.85,281.57


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-24_10-09-04
  done: false
  episode_len_mean: 284.99
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.849899999999983
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 982
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 1.0959353970156775
          entropy_coeff: 0.009999999999999998
          kl: 0.008607398420799032
          policy_loss: 0.0305445349878735
          total_loss: 0.03389682935343848
          vf_explained_var: 0.1710204780101776
          vf_loss: 0.0142980194857551
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 307000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,307,7273.9,307000,-2.8499,-2.4,-3.85,284.99


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-24_10-09-26
  done: false
  episode_len_mean: 286.37
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.8636999999999824
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 985
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 1.0082478357685938
          entropy_coeff: 0.009999999999999998
          kl: 0.007282568403064241
          policy_loss: 0.05537277791235182
          total_loss: 0.05548918429348204
          vf_explained_var: 0.31005343794822693
          vf_loss: 0.010187350529142552
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 308000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,308,7295.95,308000,-2.8637,-2.4,-3.85,286.37


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-24_10-09-50
  done: false
  episode_len_mean: 286.66
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.8665999999999827
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 988
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0015838176012039178
          cur_lr: 5.000000000000001e-05
          entropy: 0.7222506278091007
          entropy_coeff: 0.009999999999999998
          kl: 0.02313983958033232
          policy_loss: -0.10007905165354411
          total_loss: -0.09624920040369034
          vf_explained_var: 0.3895309865474701
          vf_loss: 0.011015707864943478
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,309,7319.92,309000,-2.8666,-2.4,-3.85,286.66




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-24_10-10-33
  done: false
  episode_len_mean: 287.3
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8729999999999825
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 992
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0023757264018058784
          cur_lr: 5.000000000000001e-05
          entropy: 0.8940497464603848
          entropy_coeff: 0.009999999999999998
          kl: 0.010724296195940753
          policy_loss: -0.04536381479766634
          total_loss: -0.03890109004245864
          vf_explained_var: 0.3246677815914154
          vf_loss: 0.015377744479984459
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 310000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,310,7362.4,310000,-2.873,-2.19,-3.85,287.3


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-24_10-10-58
  done: false
  episode_len_mean: 287.55
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8754999999999824
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 996
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0023757264018058784
          cur_lr: 5.000000000000001e-05
          entropy: 0.9660017251968384
          entropy_coeff: 0.009999999999999998
          kl: 0.009405207536804343
          policy_loss: 0.014790188603931003
          total_loss: 0.01677316307193703
          vf_explained_var: 0.5312321186065674
          vf_loss: 0.011620642503516541
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,311,7387.44,311000,-2.8755,-2.19,-3.85,287.55


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-24_10-11-23
  done: false
  episode_len_mean: 287.22
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8721999999999825
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1000
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0023757264018058784
          cur_lr: 5.000000000000001e-05
          entropy: 0.8330606129434374
          entropy_coeff: 0.009999999999999998
          kl: 0.005100244580115051
          policy_loss: 0.021887709117598003
          total_loss: 0.024550952224267855
          vf_explained_var: 0.5704025626182556
          vf_loss: 0.01098172999918461
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 312000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,312,7412.53,312000,-2.8722,-2.19,-3.85,287.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-24_10-11-49
  done: false
  episode_len_mean: 285.36
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8535999999999833
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1004
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0023757264018058784
          cur_lr: 5.000000000000001e-05
          entropy: 0.7980451689826118
          entropy_coeff: 0.009999999999999998
          kl: 0.0034872184334000323
          policy_loss: 0.02794928550720215
          total_loss: 0.030624894756409858
          vf_explained_var: 0.5855082869529724
          vf_loss: 0.010647774332513411
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 31300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,313,7438.51,313000,-2.8536,-2.19,-3.85,285.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-24_10-12-14
  done: false
  episode_len_mean: 284.35
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8434999999999833
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1008
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0011878632009029392
          cur_lr: 5.000000000000001e-05
          entropy: 0.9102366752094693
          entropy_coeff: 0.009999999999999998
          kl: 0.004773960941620656
          policy_loss: 0.017784406493107478
          total_loss: 0.01826551407575607
          vf_explained_var: 0.672303318977356
          vf_loss: 0.009577803117119603
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,314,7463.84,314000,-2.8435,-2.19,-3.85,284.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-24_10-12-40
  done: false
  episode_len_mean: 282.82
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.828199999999984
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1012
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005939316004514696
          cur_lr: 5.000000000000001e-05
          entropy: 0.8936722808414035
          entropy_coeff: 0.009999999999999998
          kl: 0.006846976571173257
          policy_loss: 0.002359430988629659
          total_loss: 0.002643378617035018
          vf_explained_var: 0.6691498756408691
          vf_loss: 0.009216600252936283
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 315000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,315,7489.7,315000,-2.8282,-2.19,-3.85,282.82


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-24_10-13-03
  done: false
  episode_len_mean: 283.57
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.835699999999983
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1015
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005939316004514696
          cur_lr: 5.000000000000001e-05
          entropy: 1.336514304081599
          entropy_coeff: 0.009999999999999998
          kl: 0.011391683562851461
          policy_loss: 0.04625353566888306
          total_loss: 0.04067725980033477
          vf_explained_var: 0.5781882405281067
          vf_loss: 0.007782102937603163
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,316,7512.62,316000,-2.8357,-2.19,-3.85,283.57


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-24_10-13-27
  done: false
  episode_len_mean: 283.65
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8364999999999823
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1018
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005939316004514696
          cur_lr: 5.000000000000001e-05
          entropy: 0.9477247926923964
          entropy_coeff: 0.009999999999999998
          kl: 0.011271630893229556
          policy_loss: -0.12601536959409715
          total_loss: -0.12104318680034744
          vf_explained_var: 0.47870945930480957
          vf_loss: 0.014442740711900923
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 3170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,317,7536.68,317000,-2.8365,-2.19,-3.85,283.65




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-24_10-14-11
  done: false
  episode_len_mean: 282.64
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8263999999999836
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1022
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005939316004514696
          cur_lr: 5.000000000000001e-05
          entropy: 0.9154674132664998
          entropy_coeff: 0.009999999999999998
          kl: 0.015882840081210107
          policy_loss: -0.11879751988583141
          total_loss: -0.1128132849931717
          vf_explained_var: 0.5622662305831909
          vf_loss: 0.01512947724097305
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,318,7580.22,318000,-2.8264,-2.19,-3.85,282.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-24_10-14-35
  done: false
  episode_len_mean: 282.07
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8206999999999836
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1026
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0005939316004514696
          cur_lr: 5.000000000000001e-05
          entropy: 1.2803728342056275
          entropy_coeff: 0.009999999999999998
          kl: 0.028755125612868446
          policy_loss: 0.018847517917553583
          total_loss: 0.013782194587919447
          vf_explained_var: 0.725818395614624
          vf_loss: 0.007721324792752663
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 319000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,319,7604.16,319000,-2.8207,-2.19,-3.85,282.07


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-24_10-15-00
  done: false
  episode_len_mean: 282.51
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.825099999999984
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1030
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 1.0053914719157748
          entropy_coeff: 0.009999999999999998
          kl: 0.009451595267986768
          policy_loss: 0.006758826474348704
          total_loss: 0.00364161878824234
          vf_explained_var: 0.8498336672782898
          vf_loss: 0.006928287146406041
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 320000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,320,7628.78,320000,-2.8251,-2.19,-3.85,282.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-24_10-15-23
  done: false
  episode_len_mean: 283.44
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8343999999999823
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1033
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 0.9009231236245897
          entropy_coeff: 0.009999999999999998
          kl: 0.005882508084124143
          policy_loss: 0.08313528117206362
          total_loss: 0.08087713826033804
          vf_explained_var: 0.8014566898345947
          vf_loss: 0.006745848070649017
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 321000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,321,7652.55,321000,-2.8344,-2.19,-3.85,283.44


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-24_10-15-48
  done: false
  episode_len_mean: 283.83
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.838299999999982
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1037
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 0.7910535898473527
          entropy_coeff: 0.009999999999999998
          kl: 0.007911890646914483
          policy_loss: 0.05659096240997315
          total_loss: 0.05613264267643293
          vf_explained_var: 0.7560557723045349
          vf_loss: 0.007445166585966945
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,322,7677.3,322000,-2.8383,-2.19,-3.85,283.83


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-24_10-16-12
  done: false
  episode_len_mean: 284.28
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.842799999999983
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1040
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 0.9224457442760468
          entropy_coeff: 0.009999999999999998
          kl: 0.010128601857293676
          policy_loss: -0.10282689481973648
          total_loss: -0.09912344184186724
          vf_explained_var: 0.5054841637611389
          vf_loss: 0.012918888849930631
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 323000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,323,7701.22,323000,-2.8428,-2.19,-3.85,284.28


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-24_10-16-36
  done: false
  episode_len_mean: 284.56
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.845599999999983
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1044
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 0.7730143825213115
          entropy_coeff: 0.009999999999999998
          kl: 0.0077112477663806404
          policy_loss: 0.03733788463804457
          total_loss: 0.041530891507864
          vf_explained_var: 0.36676928400993347
          vf_loss: 0.011916280703412161
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 324000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,324,7725.12,324000,-2.8456,-2.19,-3.85,284.56


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-24_10-17-01
  done: false
  episode_len_mean: 284.17
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.841699999999983
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1048
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 0.685366051726871
          entropy_coeff: 0.009999999999999998
          kl: 0.012902184183521598
          policy_loss: 0.018216562188333934
          total_loss: 0.023747729096147748
          vf_explained_var: 0.3294861912727356
          vf_loss: 0.012373332461963097
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,325,7750.43,325000,-2.8417,-2.19,-3.85,284.17




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-24_10-17-43
  done: false
  episode_len_mean: 283.17
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8316999999999832
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1052
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 0.747075006696913
          entropy_coeff: 0.009999999999999998
          kl: 0.011107101230263242
          policy_loss: 0.02350622796350055
          total_loss: 0.027559363345305125
          vf_explained_var: 0.2744276821613312
          vf_loss: 0.011513989791274071
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 326000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,326,7792.53,326000,-2.8317,-2.19,-3.85,283.17


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-24_10-18-09
  done: false
  episode_len_mean: 282.41
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.824099999999983
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1056
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 0.7060695654816098
          entropy_coeff: 0.009999999999999998
          kl: 0.010550471810026826
          policy_loss: -0.015607431448168224
          total_loss: -0.011261071181959577
          vf_explained_var: 0.4016490876674652
          vf_loss: 0.011397656229221158
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 3270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,327,7818.14,327000,-2.8241,-2.19,-3.85,282.41


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-24_10-18-33
  done: false
  episode_len_mean: 283.48
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.834799999999983
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1059
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 0.9310413281122843
          entropy_coeff: 0.009999999999999998
          kl: 0.008510235333716152
          policy_loss: 0.051346834831767614
          total_loss: 0.05157992541790009
          vf_explained_var: 0.3629157543182373
          vf_loss: 0.009535920594094529
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 328000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,328,7841.93,328000,-2.8348,-2.19,-3.85,283.48


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-24_10-18-56
  done: false
  episode_len_mean: 282.32
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8231999999999835
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 4
  episodes_total: 1063
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008908974006772042
          cur_lr: 5.000000000000001e-05
          entropy: 1.1065880715847016
          entropy_coeff: 0.009999999999999998
          kl: 0.038061410249862995
          policy_loss: 0.009464953260289299
          total_loss: 0.011273640725347731
          vf_explained_var: 0.3875941038131714
          vf_loss: 0.01284065731904573
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,329,7865.24,329000,-2.8232,-2.19,-3.85,282.32


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-24_10-19-19
  done: false
  episode_len_mean: 282.13
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8212999999999835
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1066
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.143167488442527
          entropy_coeff: 0.009999999999999998
          kl: 0.0077366285154403
          policy_loss: 0.01880914626850022
          total_loss: 0.018457599315378402
          vf_explained_var: 0.3762354254722595
          vf_loss: 0.011069790993092788
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,330,7887.8,330000,-2.8213,-2.19,-3.85,282.13


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-24_10-19-40
  done: false
  episode_len_mean: 282.64
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8263999999999836
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1069
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.4395073983404372
          entropy_coeff: 0.009999999999999998
          kl: 0.01673873498944434
          policy_loss: 0.028619059258037143
          total_loss: 0.02597505831056171
          vf_explained_var: 0.2025834172964096
          vf_loss: 0.011728703736379329
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 331000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,331,7908.77,331000,-2.8264,-2.19,-3.85,282.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-24_10-19-59
  done: false
  episode_len_mean: 283.08
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8307999999999827
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1072
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.3361236559020149
          entropy_coeff: 0.009999999999999998
          kl: 0.009240266902977782
          policy_loss: 0.010999402734968397
          total_loss: 0.008508448468314276
          vf_explained_var: -0.18830062448978424
          vf_loss: 0.010857936051777668
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 332

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,332,7928.24,332000,-2.8308,-2.19,-3.85,283.08


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-24_10-20-20
  done: false
  episode_len_mean: 282.76
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8275999999999835
  episode_reward_min: -3.849999999999962
  episodes_this_iter: 3
  episodes_total: 1075
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.3225482940673827
          entropy_coeff: 0.009999999999999998
          kl: 0.00868103246651199
          policy_loss: 0.04905705708596442
          total_loss: 0.046400895218054454
          vf_explained_var: 0.12350820749998093
          vf_loss: 0.010557722584861848
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,333,7948.82,333000,-2.8276,-2.19,-3.85,282.76


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-24_10-20-40
  done: false
  episode_len_mean: 282.32
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8231999999999844
  episode_reward_min: -3.8299999999999623
  episodes_this_iter: 3
  episodes_total: 1078
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.2591666062672933
          entropy_coeff: 0.009999999999999998
          kl: 0.008428384464431421
          policy_loss: 0.04832050783766641
          total_loss: 0.04704178886281119
          vf_explained_var: 0.1041119247674942
          vf_loss: 0.011301684126050936
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,334,7968.61,334000,-2.8232,-2.19,-3.83,282.32




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-24_10-21-18
  done: false
  episode_len_mean: 282.14
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.821399999999984
  episode_reward_min: -3.8299999999999623
  episodes_this_iter: 3
  episodes_total: 1081
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.2592759529749553
          entropy_coeff: 0.009999999999999998
          kl: 0.00973834957024665
          policy_loss: 0.04615991794400745
          total_loss: 0.04414347501264678
          vf_explained_var: 0.06161157786846161
          vf_loss: 0.010563300565182645
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,335,8007.3,335000,-2.8214,-2.19,-3.83,282.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-24_10-21-36
  done: false
  episode_len_mean: 283.46
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.834599999999983
  episode_reward_min: -4.019999999999959
  episodes_this_iter: 2
  episodes_total: 1083
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.4362934072812399
          entropy_coeff: 0.009999999999999998
          kl: 0.016361776684405912
          policy_loss: -0.05238017299109035
          total_loss: -0.06037904396653175
          vf_explained_var: 0.22034505009651184
          vf_loss: 0.006342196007446748
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 33600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,336,8025.16,336000,-2.8346,-2.19,-4.02,283.46


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-24_10-21-55
  done: false
  episode_len_mean: 286.17
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8616999999999835
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1086
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.357860869831509
          entropy_coeff: 0.009999999999999998
          kl: 0.0073992709128898725
          policy_loss: -0.09094295253356298
          total_loss: -0.08625168022182253
          vf_explained_var: 0.011041659861803055
          vf_loss: 0.018259994292424784
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 337

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,337,8044.27,337000,-2.8617,-2.19,-4.05,286.17


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-24_10-22-14
  done: false
  episode_len_mean: 287.7
  episode_media: {}
  episode_reward_max: -2.1899999999999973
  episode_reward_mean: -2.8769999999999825
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1089
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.1569264882140688
          entropy_coeff: 0.009999999999999998
          kl: 0.010576679596852854
          policy_loss: -0.02686149858766132
          total_loss: -0.02514772050910526
          vf_explained_var: -0.010645383968949318
          vf_loss: 0.013268912454239196
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 338

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,338,8063.38,338000,-2.877,-2.19,-4.05,287.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-24_10-22-34
  done: false
  episode_len_mean: 290.83
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.9082999999999815
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1092
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.309127974510193
          entropy_coeff: 0.009999999999999998
          kl: 0.01254172224081979
          policy_loss: 0.047811378869745465
          total_loss: 0.047217609898911585
          vf_explained_var: 0.0019292877987027168
          vf_loss: 0.012480754586350586
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 33900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,339,8082.52,339000,-2.9083,-2.34,-4.05,290.83


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-24_10-22-53
  done: false
  episode_len_mean: 294.0
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.9399999999999813
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1095
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.4797249952952067
          entropy_coeff: 0.009999999999999998
          kl: 0.006930501107167765
          policy_loss: 0.042119664864407644
          total_loss: 0.03931037916077508
          vf_explained_var: -0.17955128848552704
          vf_loss: 0.01197870429346545
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,340,8101.76,340000,-2.94,-2.34,-4.05,294


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-24_10-23-14
  done: false
  episode_len_mean: 295.85
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.958499999999981
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1098
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.4507539391517639
          entropy_coeff: 0.009999999999999998
          kl: 0.009862798266082566
          policy_loss: 0.034671066453059514
          total_loss: 0.03238337304857042
          vf_explained_var: -0.0749402716755867
          vf_loss: 0.012206665218238615
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,341,8123.13,341000,-2.9585,-2.34,-4.05,295.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-24_10-23-33
  done: false
  episode_len_mean: 298.01
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -2.98009999999998
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 2
  episodes_total: 1100
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.5537692493862576
          entropy_coeff: 0.009999999999999998
          kl: 0.016916027133718595
          policy_loss: -0.09651948461929957
          total_loss: -0.10085399465428459
          vf_explained_var: 0.06421569734811783
          vf_loss: 0.011180575921510656
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,342,8142.22,342000,-2.9801,-2.34,-4.05,298.01


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-24_10-23-54
  done: false
  episode_len_mean: 300.87
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.00869999999998
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1103
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0013363461010158062
          cur_lr: 5.000000000000001e-05
          entropy: 1.5665979425112406
          entropy_coeff: 0.009999999999999998
          kl: 0.025348337537620443
          policy_loss: -0.12525448732905917
          total_loss: -0.12340998243954447
          vf_explained_var: 0.23257768154144287
          vf_loss: 0.017476608448972305
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,343,8162.72,343000,-3.0087,-2.34,-4.05,300.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-24_10-24-15
  done: false
  episode_len_mean: 302.78
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.0277999999999796
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1106
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00200451915152371
          cur_lr: 5.000000000000001e-05
          entropy: 1.3805603656503889
          entropy_coeff: 0.009999999999999998
          kl: 0.06091581403259857
          policy_loss: -0.11598247182038095
          total_loss: -0.11401829967896143
          vf_explained_var: 0.34699127078056335
          vf_loss: 0.015647668795039257
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,344,8183.95,344000,-3.0278,-2.34,-4.05,302.78




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-24_10-24-54
  done: false
  episode_len_mean: 304.46
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.0445999999999795
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 4
  episodes_total: 1110
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0030067787272855633
          cur_lr: 5.000000000000001e-05
          entropy: 1.4720883144272698
          entropy_coeff: 0.009999999999999998
          kl: 0.006811514481350874
          policy_loss: 0.016649006803830465
          total_loss: 0.017328734281990263
          vf_explained_var: 0.2957550585269928
          vf_loss: 0.01538012744858861
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,345,8223.01,345000,-3.0446,-2.34,-4.05,304.46


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-24_10-25-20
  done: false
  episode_len_mean: 304.46
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.0445999999999787
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1113
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0030067787272855633
          cur_lr: 5.000000000000001e-05
          entropy: 1.0764250808291964
          entropy_coeff: 0.009999999999999998
          kl: 0.016467692595304662
          policy_loss: -0.0926640117333995
          total_loss: -0.08938225913378928
          vf_explained_var: 0.2287052571773529
          vf_loss: 0.013996489076978631
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,346,8249,346000,-3.0446,-2.34,-4.05,304.46


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-24_10-25-45
  done: false
  episode_len_mean: 303.41
  episode_media: {}
  episode_reward_max: -2.339999999999994
  episode_reward_mean: -3.0340999999999787
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 4
  episodes_total: 1117
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0030067787272855633
          cur_lr: 5.000000000000001e-05
          entropy: 0.9640512122048273
          entropy_coeff: 0.009999999999999998
          kl: 0.0062926564411030644
          policy_loss: -0.02762243648370107
          total_loss: -0.021999779178036583
          vf_explained_var: 0.11327671259641647
          vf_loss: 0.015244250713537136
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,347,8274.03,347000,-3.0341,-2.34,-4.05,303.41


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-24_10-26-10
  done: false
  episode_len_mean: 303.64
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0363999999999796
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 4
  episodes_total: 1121
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0030067787272855633
          cur_lr: 5.000000000000001e-05
          entropy: 0.8943458947870466
          entropy_coeff: 0.009999999999999998
          kl: 0.007734228683246553
          policy_loss: 0.02309383720987373
          total_loss: 0.027231112950377993
          vf_explained_var: 0.2410207986831665
          vf_loss: 0.013057478693210417
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 348000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,348,8298.82,348000,-3.0364,-2.45,-4.05,303.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-24_10-26-35
  done: false
  episode_len_mean: 303.45
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0344999999999795
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 4
  episodes_total: 1125
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0030067787272855633
          cur_lr: 5.000000000000001e-05
          entropy: 1.1283861729833815
          entropy_coeff: 0.009999999999999998
          kl: 0.00648777474189205
          policy_loss: 0.0009210258722305298
          total_loss: 0.0009224475257926517
          vf_explained_var: 0.4282835125923157
          vf_loss: 0.01126577714458108
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 34900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,349,8323.73,349000,-3.0345,-2.45,-4.05,303.45


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-24_10-27-00
  done: false
  episode_len_mean: 303.35
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0334999999999788
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1128
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0030067787272855633
          cur_lr: 5.000000000000001e-05
          entropy: 1.1525980194409688
          entropy_coeff: 0.009999999999999998
          kl: 0.00661943989000946
          policy_loss: -0.08472971899641885
          total_loss: -0.0862397301528189
          vf_explained_var: 0.43453800678253174
          vf_loss: 0.009996070091923078
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,350,8348.62,350000,-3.0335,-2.45,-4.05,303.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-24_10-27-23
  done: false
  episode_len_mean: 303.1
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.030999999999978
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 4
  episodes_total: 1132
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0030067787272855633
          cur_lr: 5.000000000000001e-05
          entropy: 1.2941466291745505
          entropy_coeff: 0.009999999999999998
          kl: 0.016094043424451185
          policy_loss: 0.0038829803466796874
          total_loss: -0.0010198024411996207
          vf_explained_var: 0.6802845597267151
          vf_loss: 0.007990293023693893
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 3510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,351,8371.65,351000,-3.031,-2.45,-4.05,303.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-24_10-27-46
  done: false
  episode_len_mean: 303.68
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.0367999999999786
  episode_reward_min: -4.049999999999958
  episodes_this_iter: 3
  episodes_total: 1135
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0030067787272855633
          cur_lr: 5.000000000000001e-05
          entropy: 1.1391815490192838
          entropy_coeff: 0.009999999999999998
          kl: 0.018872981255098332
          policy_loss: -0.07668863170676761
          total_loss: -0.0810183208849695
          vf_explained_var: 0.7939908504486084
          vf_loss: 0.007005379483517673
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 352000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,352,8394.94,352000,-3.0368,-2.45,-4.05,303.68


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-24_10-28-05
  done: false
  episode_len_mean: 307.72
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.077199999999978
  episode_reward_min: -4.999999999999938
  episodes_this_iter: 3
  episodes_total: 1138
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0030067787272855633
          cur_lr: 5.000000000000001e-05
          entropy: 2.2881443487273323
          entropy_coeff: 0.009999999999999998
          kl: 0.022606034436713194
          policy_loss: 0.0026759799155924055
          total_loss: -0.01218173743949996
          vf_explained_var: 0.3292110562324524
          vf_loss: 0.00795575343977867
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,353,8413.39,353000,-3.0772,-2.45,-5,307.72




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-24_10-28-40
  done: false
  episode_len_mean: 309.92
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.099199999999978
  episode_reward_min: -4.999999999999938
  episodes_this_iter: 2
  episodes_total: 1140
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.004510168090928346
          cur_lr: 5.000000000000001e-05
          entropy: 1.9364219056235419
          entropy_coeff: 0.009999999999999998
          kl: 0.0365048194381535
          policy_loss: -0.052829202223155236
          total_loss: -0.06536846334735552
          vf_explained_var: 0.7053176164627075
          vf_loss: 0.0066603186873382784
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,354,8448.65,354000,-3.0992,-2.45,-5,309.92


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-24_10-28-58
  done: false
  episode_len_mean: 315.47
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.1546999999999765
  episode_reward_min: -5.919999999999918
  episodes_this_iter: 2
  episodes_total: 1142
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006765252136392522
          cur_lr: 5.000000000000001e-05
          entropy: 2.29910847875807
          entropy_coeff: 0.009999999999999998
          kl: 0.015059669423518263
          policy_loss: -0.23217036674420038
          total_loss: -0.2467207959956593
          vf_explained_var: 0.3102521598339081
          vf_loss: 0.008338769567975154
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,355,8467.03,355000,-3.1547,-2.45,-5.92,315.47


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-24_10-29-16
  done: false
  episode_len_mean: 320.91
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.327899999999974
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 2
  episodes_total: 1144
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006765252136392522
          cur_lr: 5.000000000000001e-05
          entropy: 2.251120360692342
          entropy_coeff: 0.009999999999999998
          kl: 0.041186749121993986
          policy_loss: 0.07802250550852882
          total_loss: 0.11698386039998797
          vf_explained_var: -0.41683441400527954
          vf_loss: 0.061193921864873525
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,356,8484.89,356000,-3.3279,-2.45,-11.4,320.91


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-24_10-29-33
  done: false
  episode_len_mean: 323.89
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.3576999999999737
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 1
  episodes_total: 1145
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010147878204588776
          cur_lr: 5.000000000000001e-05
          entropy: 2.28265380859375
          entropy_coeff: 0.009999999999999998
          kl: 0.022898812668670387
          policy_loss: -0.2157151503695382
          total_loss: -0.22774095171027714
          vf_explained_var: 0.8711476922035217
          vf_loss: 0.010568369730996589
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,357,8501.84,357000,-3.3577,-2.45,-11.4,323.89


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-24_10-29-51
  done: false
  episode_len_mean: 329.05
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.409299999999972
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 2
  episodes_total: 1147
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015221817306883167
          cur_lr: 5.000000000000001e-05
          entropy: 2.292079869906108
          entropy_coeff: 0.009999999999999998
          kl: 0.01942466064097163
          policy_loss: -0.054011364612314434
          total_loss: -0.05750845596194267
          vf_explained_var: 0.43328502774238586
          vf_loss: 0.019128023385484187
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,358,8519.27,358000,-3.4093,-2.45,-11.4,329.05


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-24_10-30-11
  done: false
  episode_len_mean: 334.61
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.4648999999999703
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1150
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.015221817306883167
          cur_lr: 5.000000000000001e-05
          entropy: 2.12266092300415
          entropy_coeff: 0.009999999999999998
          kl: 0.020501604753484424
          policy_loss: -0.017833680576748317
          total_loss: -0.019111416902807023
          vf_explained_var: 0.6432809829711914
          vf_loss: 0.01963680312037468
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,359,8539.09,359000,-3.4649,-2.45,-11.4,334.61


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-24_10-30-28
  done: false
  episode_len_mean: 338.99
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -3.5086999999999704
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 2
  episodes_total: 1152
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.9112542192141215
          entropy_coeff: 0.009999999999999998
          kl: 0.011737863442984173
          policy_loss: 0.08180246866411633
          total_loss: 0.07074563023116853
          vf_explained_var: 0.38212355971336365
          vf_loss: 0.007787695702831519
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,360,8556.86,360000,-3.5087,-2.45,-11.4,338.99


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-24_10-30-48
  done: false
  episode_len_mean: 343.28
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.551599999999971
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1155
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.8583545472886827
          entropy_coeff: 0.009999999999999998
          kl: 0.011043982204820483
          policy_loss: -0.057153527583513
          total_loss: -0.06275763743453555
          vf_explained_var: 0.650975227355957
          vf_loss: 0.01272727082315315
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,361,8576.59,361000,-3.5516,-2.54,-11.4,343.28


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-24_10-31-09
  done: false
  episode_len_mean: 345.56
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.574399999999969
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 2
  episodes_total: 1157
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.76775787141588
          entropy_coeff: 0.009999999999999998
          kl: 0.008831418926323161
          policy_loss: -0.059604850825336245
          total_loss: -0.06255695985423194
          vf_explained_var: 0.5253308415412903
          vf_loss: 0.01452382160609381
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,362,8597.12,362000,-3.5744,-2.54,-11.4,345.56


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-24_10-31-29
  done: false
  episode_len_mean: 346.76
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.586399999999969
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1160
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.6249454763200548
          entropy_coeff: 0.009999999999999998
          kl: 0.008597235882537134
          policy_loss: -0.0954784746799204
          total_loss: -0.09486814878053135
          vf_explained_var: 0.4114640951156616
          vf_loss: 0.016663483964900177
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,363,8617.45,363000,-3.5864,-2.54,-11.4,346.76


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-24_10-31-49
  done: false
  episode_len_mean: 349.35
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.6122999999999688
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1163
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.548232834868961
          entropy_coeff: 0.009999999999999998
          kl: 0.007539276008606559
          policy_loss: 0.061828930675983426
          total_loss: 0.05761768710282114
          vf_explained_var: 0.17105159163475037
          vf_loss: 0.011098941384504239
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,364,8637.56,364000,-3.6123,-2.54,-11.4,349.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-24_10-32-10
  done: false
  episode_len_mean: 350.0
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.618799999999969
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1166
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.4460579805903964
          entropy_coeff: 0.009999999999999998
          kl: 0.008578966971919568
          policy_loss: 0.009335400660832723
          total_loss: 0.007181955046123929
          vf_explained_var: 0.393452912569046
          vf_loss: 0.01211125442577112
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,365,8658.85,365000,-3.6188,-2.54,-11.4,350


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-24_10-32-30
  done: false
  episode_len_mean: 350.63
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.6250999999999687
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1169
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.4761518624093797
          entropy_coeff: 0.009999999999999998
          kl: 0.008818602813211503
          policy_loss: 0.05123998522758484
          total_loss: 0.048093357847796545
          vf_explained_var: 0.30586737394332886
          vf_loss: 0.01141353721678671
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,366,8678.77,366000,-3.6251,-2.54,-11.4,350.63




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-24_10-33-07
  done: false
  episode_len_mean: 350.74
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.626199999999969
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1172
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.5687852144241332
          entropy_coeff: 0.009999999999999998
          kl: 0.010858670649060227
          policy_loss: 0.06173736287487878
          total_loss: 0.05781555275122325
          vf_explained_var: 0.18979117274284363
          vf_loss: 0.011518108886149195
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,367,8714.92,367000,-3.6262,-2.54,-11.4,350.74


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-24_10-33-27
  done: false
  episode_len_mean: 350.61
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.6248999999999683
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1175
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.6701716992590163
          entropy_coeff: 0.009999999999999998
          kl: 0.01567717003283185
          policy_loss: 0.05613201856613159
          total_loss: 0.051225509577327306
          vf_explained_var: -0.013083009980618954
          vf_loss: 0.011437256954377518
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,368,8735.17,368000,-3.6249,-2.54,-11.4,350.61


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-24_10-33-48
  done: false
  episode_len_mean: 349.85
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.617299999999969
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1178
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.5277404851383634
          entropy_coeff: 0.009999999999999998
          kl: 0.0060338944160186255
          policy_loss: 0.06845672296153174
          total_loss: 0.0648275117079417
          vf_explained_var: 0.1390380561351776
          vf_loss: 0.011510420121097317
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,369,8756.43,369000,-3.6173,-2.54,-11.4,349.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-24_10-34-08
  done: false
  episode_len_mean: 350.33
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.622099999999968
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1181
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.5493040641148885
          entropy_coeff: 0.009999999999999998
          kl: 0.012647258930729264
          policy_loss: 0.05061031720704502
          total_loss: 0.047704532245794934
          vf_explained_var: -0.25147882103919983
          vf_loss: 0.01229848012379888
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,370,8776.35,370000,-3.6221,-2.54,-11.4,350.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-24_10-34-30
  done: false
  episode_len_mean: 348.2
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.6007999999999685
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1184
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.4331888026661344
          entropy_coeff: 0.009999999999999998
          kl: 0.006506663359978325
          policy_loss: 0.049524897502528295
          total_loss: 0.046954257869058186
          vf_explained_var: 0.04616687074303627
          vf_loss: 0.011612681465016471
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,371,8797.79,371000,-3.6008,-2.54,-11.4,348.2


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-24_10-34-50
  done: false
  episode_len_mean: 347.26
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.591399999999969
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1187
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.474637766679128
          entropy_coeff: 0.009999999999999998
          kl: 0.00828453920536227
          policy_loss: 0.01702123342288865
          total_loss: 0.015067121883233388
          vf_explained_var: 0.0936809554696083
          vf_loss: 0.01260310608583192
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,372,8818.43,372000,-3.5914,-2.54,-11.4,347.26


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-24_10-35-10
  done: false
  episode_len_mean: 346.22
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.5809999999999693
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1190
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.3546791315078734
          entropy_coeff: 0.009999999999999998
          kl: 0.011968499941419786
          policy_loss: 0.019316781643364166
          total_loss: 0.019376705669694478
          vf_explained_var: -0.019277093932032585
          vf_loss: 0.013333440767989183
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 37300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,373,8838.7,373000,-3.581,-2.54,-11.4,346.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-24_10-35-33
  done: false
  episode_len_mean: 345.16
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.570399999999969
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1193
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.2793922781944276
          entropy_coeff: 0.009999999999999998
          kl: 0.011152405046622663
          policy_loss: -0.10454105113943418
          total_loss: -0.10029058166676097
          vf_explained_var: 0.1009816825389862
          vf_loss: 0.016789753300448258
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,374,8860.92,374000,-3.5704,-2.54,-11.4,345.16


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-24_10-35-54
  done: false
  episode_len_mean: 343.58
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.554599999999971
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 4
  episodes_total: 1197
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.5335740592744616
          entropy_coeff: 0.009999999999999998
          kl: 0.009421716670915398
          policy_loss: -0.005725705375274022
          total_loss: -0.003269569410218133
          vf_explained_var: 0.10007870197296143
          vf_loss: 0.01757675034718381
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,375,8882.56,375000,-3.5546,-2.54,-11.4,343.58




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-24_10-36-34
  done: false
  episode_len_mean: 341.87
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.53749999999997
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1200
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.3157340897454155
          entropy_coeff: 0.009999999999999998
          kl: 0.008597456511154098
          policy_loss: 0.07914530734221141
          total_loss: 0.07374041146702237
          vf_explained_var: 0.26172247529029846
          vf_loss: 0.007556141548169156
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,376,8922.15,376000,-3.5375,-2.54,-11.4,341.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-24_10-36-59
  done: false
  episode_len_mean: 339.15
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.510299999999971
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 4
  episodes_total: 1204
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.1770325700441997
          entropy_coeff: 0.009999999999999998
          kl: 0.009486709422645213
          policy_loss: 0.0064751219004392626
          total_loss: 0.012822258182697825
          vf_explained_var: 0.05410384014248848
          vf_loss: 0.017900856460134187
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,377,8946.67,377000,-3.5103,-2.54,-11.4,339.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-24_10-37-21
  done: false
  episode_len_mean: 338.13
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.500099999999971
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1207
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.2412475943565369
          entropy_coeff: 0.009999999999999998
          kl: 0.010533478671599757
          policy_loss: 0.03345991919438044
          total_loss: 0.03357616803712315
          vf_explained_var: 0.021135399118065834
          vf_loss: 0.012288220676903923
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,378,8969.15,378000,-3.5001,-2.54,-11.4,338.13


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-24_10-37-45
  done: false
  episode_len_mean: 337.68
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.4955999999999716
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 4
  episodes_total: 1211
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.113202722205056
          entropy_coeff: 0.009999999999999998
          kl: 0.009012257304499135
          policy_loss: 0.001210806311832534
          total_loss: 0.00562921373380555
          vf_explained_var: 0.17117281258106232
          vf_loss: 0.015344662198589907
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 379000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,379,8993.35,379000,-3.4956,-2.54,-11.4,337.68


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-24_10-38-08
  done: false
  episode_len_mean: 338.11
  episode_media: {}
  episode_reward_max: -2.53999999999999
  episode_reward_mean: -3.4998999999999705
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1214
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.357342733277215
          entropy_coeff: 0.009999999999999998
          kl: 0.007830012121810902
          policy_loss: 0.04138098789585961
          total_loss: 0.036495355930593276
          vf_explained_var: 0.28518566489219666
          vf_loss: 0.008509016055743106
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,380,9016.2,380000,-3.4999,-2.54,-11.4,338.11


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-24_10-38-32
  done: false
  episode_len_mean: 338.92
  episode_media: {}
  episode_reward_max: -2.5499999999999896
  episode_reward_mean: -3.507999999999971
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 4
  episodes_total: 1218
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.2422680947515699
          entropy_coeff: 0.009999999999999998
          kl: 0.006346627390229307
          policy_loss: 0.02303129037221273
          total_loss: 0.024141193512413235
          vf_explained_var: 0.38744282722473145
          vf_loss: 0.0133876729135712
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 381000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,381,9040.14,381000,-3.508,-2.55,-11.4,338.92


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-24_10-38-56
  done: false
  episode_len_mean: 339.61
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.514899999999971
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1221
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.3148472706476848
          entropy_coeff: 0.009999999999999998
          kl: 0.010304873472559786
          policy_loss: 0.02972487070494228
          total_loss: 0.02390822395682335
          vf_explained_var: 0.6598025560379028
          vf_loss: 0.007096535936903415
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,382,9063.63,382000,-3.5149,-2.58,-11.4,339.61


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-24_10-39-18
  done: false
  episode_len_mean: 340.2
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.520799999999971
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1224
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.2326981796158685
          entropy_coeff: 0.009999999999999998
          kl: 0.00571344682034203
          policy_loss: -0.11557037565443251
          total_loss: -0.1199652729762925
          vf_explained_var: 0.7386038899421692
          vf_loss: 0.007801626027665204
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,383,9086.46,383000,-3.5208,-2.58,-11.4,340.2


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-24_10-39-41
  done: false
  episode_len_mean: 341.58
  episode_media: {}
  episode_reward_max: -2.579999999999989
  episode_reward_mean: -3.5345999999999695
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 4
  episodes_total: 1228
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.2252609180079566
          entropy_coeff: 0.009999999999999998
          kl: 0.011222031435229398
          policy_loss: 0.04036870565679338
          total_loss: 0.038070771015352674
          vf_explained_var: 0.6773551106452942
          vf_loss: 0.009698443896033698
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 384000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,384,9108.52,384000,-3.5346,-2.58,-11.4,341.58




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-24_10-40-23
  done: false
  episode_len_mean: 341.49
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.53369999999997
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1231
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.1860169245137109
          entropy_coeff: 0.009999999999999998
          kl: 0.008482191067091962
          policy_loss: -0.14559485846095616
          total_loss: -0.14807996534638934
          vf_explained_var: 0.6504514217376709
          vf_loss: 0.009181392668849892
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,385,9150.47,385000,-3.5337,-2.29,-11.4,341.49


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-24_10-40-46
  done: false
  episode_len_mean: 341.25
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.5312999999999697
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 4
  episodes_total: 1235
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.198632585340076
          entropy_coeff: 0.009999999999999998
          kl: 0.009560490422610791
          policy_loss: 0.003515670821070671
          total_loss: 0.005463684350252152
          vf_explained_var: 0.2960973381996155
          vf_loss: 0.013716048198855586
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,386,9173.85,386000,-3.5313,-2.29,-11.4,341.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-24_10-41-10
  done: false
  episode_len_mean: 336.99
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.4886999999999717
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 4
  episodes_total: 1239
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.0479785495334202
          entropy_coeff: 0.009999999999999998
          kl: 0.007520575822564446
          policy_loss: 0.06653613986240493
          total_loss: 0.0659932264023357
          vf_explained_var: 0.39495787024497986
          vf_loss: 0.009765155856601065
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,387,9198.28,387000,-3.4887,-2.29,-11.4,336.99


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-24_10-41-34
  done: false
  episode_len_mean: 329.4
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.4127999999999723
  episode_reward_min: -11.39999999999989
  episodes_this_iter: 3
  episodes_total: 1242
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.5538839419682822
          entropy_coeff: 0.009999999999999998
          kl: 0.014417134861107113
          policy_loss: 0.013062058885892232
          total_loss: 0.007429778741465674
          vf_explained_var: 0.2860516309738159
          vf_loss: 0.009577378928143945
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,388,9221.56,388000,-3.4128,-2.29,-11.4,329.4


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-24_10-41-56
  done: false
  episode_len_mean: 319.45
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.1944999999999752
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 4
  episodes_total: 1246
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.5623878147866992
          entropy_coeff: 0.009999999999999998
          kl: 0.009887298067677679
          policy_loss: 0.03209809685746829
          total_loss: 0.028902112940947213
          vf_explained_var: 0.39261001348495483
          vf_loss: 0.012202142520497243
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,389,9244.28,389000,-3.1945,-2.29,-5.5,319.45


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-24_10-42-21
  done: false
  episode_len_mean: 312.96
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.129599999999977
  episode_reward_min: -5.499999999999927
  episodes_this_iter: 3
  episodes_total: 1249
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.2745037900076972
          entropy_coeff: 0.009999999999999998
          kl: 0.00507316433683244
          policy_loss: 0.02194795235991478
          total_loss: 0.018044008480177984
          vf_explained_var: 0.47242382168769836
          vf_loss: 0.00872525899645148
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,390,9268.82,390000,-3.1296,-2.29,-5.5,312.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-24_10-42-43
  done: false
  episode_len_mean: 308.06
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.080599999999978
  episode_reward_min: -4.869999999999941
  episodes_this_iter: 4
  episodes_total: 1253
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.4201789816220602
          entropy_coeff: 0.009999999999999998
          kl: 0.010338987450693003
          policy_loss: -0.019576314008898206
          total_loss: -0.019273383584287433
          vf_explained_var: 0.331036239862442
          vf_loss: 0.014268652546323007
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,391,9291.06,391000,-3.0806,-2.29,-4.87,308.06


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-24_10-43-07
  done: false
  episode_len_mean: 303.79
  episode_media: {}
  episode_reward_max: -2.289999999999995
  episode_reward_mean: -3.0378999999999787
  episode_reward_min: -3.699999999999965
  episodes_this_iter: 3
  episodes_total: 1256
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 1.0623759388923646
          entropy_coeff: 0.009999999999999998
          kl: 0.008898711544934202
          policy_loss: -0.048281978526049193
          total_loss: -0.048395389152897726
          vf_explained_var: 0.5037849545478821
          vf_loss: 0.010307170347207122
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 39200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,392,9315.27,392000,-3.0379,-2.29,-3.7,303.79




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-24_10-43-50
  done: false
  episode_len_mean: 300.32
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -3.00319999999998
  episode_reward_min: -3.699999999999965
  episodes_this_iter: 4
  episodes_total: 1260
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.022832725960324762
          cur_lr: 5.000000000000001e-05
          entropy: 0.9421282907327017
          entropy_coeff: 0.009999999999999998
          kl: 0.004905544235713756
          policy_loss: -0.02594340153866344
          total_loss: -0.024194377660751342
          vf_explained_var: 0.4233262240886688
          vf_loss: 0.01105829994711611
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,393,9357.84,393000,-3.0032,-2.21,-3.7,300.32


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-24_10-44-17
  done: false
  episode_len_mean: 296.5
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.9649999999999808
  episode_reward_min: -3.659999999999966
  episodes_this_iter: 4
  episodes_total: 1264
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.011416362980162381
          cur_lr: 5.000000000000001e-05
          entropy: 0.9082021547688378
          entropy_coeff: 0.009999999999999998
          kl: 0.00336151984793776
          policy_loss: -0.039151544206672245
          total_loss: -0.03658765595820215
          vf_explained_var: 0.38924577832221985
          vf_loss: 0.011607533486353027
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,394,9385.29,394000,-2.965,-2.21,-3.66,296.5


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-24_10-44-41
  done: false
  episode_len_mean: 293.71
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.937099999999981
  episode_reward_min: -3.659999999999966
  episodes_this_iter: 4
  episodes_total: 1268
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.9980975084834629
          entropy_coeff: 0.009999999999999998
          kl: 0.011451098553479689
          policy_loss: 0.03555857166647911
          total_loss: 0.03753305930230352
          vf_explained_var: 0.3960656523704529
          vf_loss: 0.01189010012894869
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,395,9409.02,395000,-2.9371,-2.21,-3.66,293.71


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-24_10-45-08
  done: false
  episode_len_mean: 289.81
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.8980999999999826
  episode_reward_min: -3.659999999999966
  episodes_this_iter: 4
  episodes_total: 1272
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.7524757570690579
          entropy_coeff: 0.009999999999999998
          kl: 0.014033644733464514
          policy_loss: 0.03570864109529389
          total_loss: 0.0393332721458541
          vf_explained_var: 0.40345221757888794
          vf_loss: 0.011069284317394098
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,396,9436.01,396000,-2.8981,-2.21,-3.66,289.81


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-24_10-45-32
  done: false
  episode_len_mean: 287.33
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.8732999999999835
  episode_reward_min: -3.659999999999966
  episodes_this_iter: 4
  episodes_total: 1276
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.9557187729411655
          entropy_coeff: 0.009999999999999998
          kl: 0.012752629020399158
          policy_loss: 0.04535578162305885
          total_loss: 0.053272219250599544
          vf_explained_var: 0.1033080443739891
          vf_loss: 0.017400829390519196
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,397,9460.12,397000,-2.8733,-2.21,-3.66,287.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-24_10-45-59
  done: false
  episode_len_mean: 283.68
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.836799999999984
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 1280
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.6357402834627364
          entropy_coeff: 0.009999999999999998
          kl: 0.005119095726114489
          policy_loss: 0.018175813721285926
          total_loss: 0.023778522594107523
          vf_explained_var: 0.2870316207408905
          vf_loss: 0.011930893299480279
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,398,9487.15,398000,-2.8368,-2.21,-3.59,283.68


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-24_10-46-25
  done: false
  episode_len_mean: 281.42
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.8141999999999836
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 1284
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.9551870286464691
          entropy_coeff: 0.009999999999999998
          kl: 0.006942661850761075
          policy_loss: 0.0171478853871425
          total_loss: 0.01965763635105557
          vf_explained_var: 0.2549528479576111
          vf_loss: 0.012021994249274333
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,399,9512.4,399000,-2.8142,-2.21,-3.59,281.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-24_10-46-49
  done: false
  episode_len_mean: 279.73
  episode_media: {}
  episode_reward_max: -2.209999999999997
  episode_reward_mean: -2.797299999999985
  episode_reward_min: -3.519999999999969
  episodes_this_iter: 3
  episodes_total: 1287
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 1.0239967829651302
          entropy_coeff: 0.009999999999999998
          kl: 0.005546492264605263
          policy_loss: -0.04009590182039473
          total_loss: -0.04044719744059774
          vf_explained_var: 0.4420629143714905
          vf_loss: 0.009857007896708738
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,400,9536.98,400000,-2.7973,-2.21,-3.52,279.73




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-24_10-47-33
  done: false
  episode_len_mean: 275.93
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.759299999999984
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1291
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.7705541524622176
          entropy_coeff: 0.009999999999999998
          kl: 0.005829027665943916
          policy_loss: -0.09693006575107574
          total_loss: -0.08624402284622193
          vf_explained_var: 0.11370327323675156
          vf_loss: 0.018358306545350287
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 40100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,401,9580.58,401000,-2.7593,-2.09,-3.35,275.93


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-24_10-47-56
  done: false
  episode_len_mean: 275.05
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.7504999999999855
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1295
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 1.1852373785442776
          entropy_coeff: 0.009999999999999998
          kl: 0.013742206835495808
          policy_loss: 0.008966321332587136
          total_loss: 0.008009208738803864
          vf_explained_var: 0.5413329005241394
          vf_loss: 0.010816816995955176
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 40200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,402,9604.02,402000,-2.7505,-2.09,-3.35,275.05


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-24_10-48-20
  done: false
  episode_len_mean: 274.07
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.7406999999999844
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 1298
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 1.0762152598963843
          entropy_coeff: 0.009999999999999998
          kl: 0.018831214560424157
          policy_loss: -0.08502395794623428
          total_loss: -0.08180487847162618
          vf_explained_var: 0.3683890402317047
          vf_loss: 0.013873740006238222
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 40300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,403,9627.52,403000,-2.7407,-2.09,-3.35,274.07


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-24_10-48-44
  done: false
  episode_len_mean: 274.04
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.740399999999985
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1302
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.8489760955174764
          entropy_coeff: 0.009999999999999998
          kl: 0.006955996746215042
          policy_loss: 0.034265971183776854
          total_loss: 0.03645069640543726
          vf_explained_var: 0.33078083395957947
          vf_loss: 0.010634781875544124
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,404,9651.35,404000,-2.7404,-2.09,-3.35,274.04


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-24_10-49-07
  done: false
  episode_len_mean: 274.15
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.741499999999985
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1306
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.9307263996866014
          entropy_coeff: 0.009999999999999998
          kl: 0.01249938685624667
          policy_loss: -0.015728512530525526
          total_loss: -0.01014219613538848
          vf_explained_var: 0.11661358922719955
          vf_loss: 0.01482223230931494
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,405,9674.66,405000,-2.7415,-2.09,-3.35,274.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-24_10-49-33
  done: false
  episode_len_mean: 272.65
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.7264999999999855
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1310
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.755102362897661
          entropy_coeff: 0.009999999999999998
          kl: 0.00849626818766913
          policy_loss: 0.005668088379833434
          total_loss: 0.011618792389829954
          vf_explained_var: 0.15540118515491486
          vf_loss: 0.013453228730294439
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,406,9700.68,406000,-2.7265,-2.09,-3.35,272.65


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-24_10-49-57
  done: false
  episode_len_mean: 272.71
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.727099999999985
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 1313
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 1.0053657750288645
          entropy_coeff: 0.009999999999999998
          kl: 0.013627373198473415
          policy_loss: 0.02108829559551345
          total_loss: 0.020985168798102274
          vf_explained_var: 0.17422305047512054
          vf_loss: 0.009872741360838214
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,407,9724.19,407000,-2.7271,-2.09,-3.35,272.71


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-24_10-50-20
  done: false
  episode_len_mean: 272.59
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.725899999999987
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1317
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 1.1828201439645556
          entropy_coeff: 0.009999999999999998
          kl: 0.011919181869867372
          policy_loss: 0.027631825953722
          total_loss: 0.026877569158871968
          vf_explained_var: 0.2822227478027344
          vf_loss: 0.011005907019393312
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,408,9747.59,408000,-2.7259,-2.09,-3.35,272.59




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-24_10-51-04
  done: false
  episode_len_mean: 271.36
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.713599999999986
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1321
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 0.9292523211903042
          entropy_coeff: 0.009999999999999998
          kl: 0.014857356896035531
          policy_loss: -0.0026714006231890785
          total_loss: 0.00026478411422835454
          vf_explained_var: 0.35162970423698425
          vf_loss: 0.012143901021530231
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,409,9791.32,409000,-2.7136,-2.09,-3.35,271.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-24_10-51-28
  done: false
  episode_len_mean: 270.78
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.707799999999986
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 1324
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005708181490081191
          cur_lr: 5.000000000000001e-05
          entropy: 1.055629732211431
          entropy_coeff: 0.009999999999999998
          kl: 0.0048593215059183435
          policy_loss: -0.07857259958982468
          total_loss: -0.0761948849591944
          vf_explained_var: 0.2956690490245819
          vf_loss: 0.012906275720645984
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,410,9815.83,410000,-2.7078,-2.09,-3.35,270.78


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-24_10-51-53
  done: false
  episode_len_mean: 269.61
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6960999999999866
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1328
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0028540907450405953
          cur_lr: 5.000000000000001e-05
          entropy: 1.2595676316155329
          entropy_coeff: 0.009999999999999998
          kl: 0.020973426050827883
          policy_loss: 0.04815684403810236
          total_loss: 0.045618415416942705
          vf_explained_var: 0.42378950119018555
          vf_loss: 0.009997387752971716
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 4110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,411,9840.06,411000,-2.6961,-2.09,-3.35,269.61


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-24_10-52-17
  done: false
  episode_len_mean: 269.79
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.697899999999986
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1332
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0042811361175608895
          cur_lr: 5.000000000000001e-05
          entropy: 1.4079207075966729
          entropy_coeff: 0.009999999999999998
          kl: 0.0202225325389468
          policy_loss: 0.005427876611550649
          total_loss: 0.00431355159315798
          vf_explained_var: 0.25132590532302856
          vf_loss: 0.012878306349739433
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,412,9864.51,412000,-2.6979,-2.09,-3.35,269.79


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-24_10-52-39
  done: false
  episode_len_mean: 270.4
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.703999999999986
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 1335
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.3330050044589572
          entropy_coeff: 0.009999999999999998
          kl: 0.014344779870698323
          policy_loss: 0.05476146737734477
          total_loss: 0.050749592648612125
          vf_explained_var: 0.2618948519229889
          vf_loss: 0.009226054593455046
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,413,9886.59,413000,-2.704,-2.09,-3.35,270.4


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-24_10-53-04
  done: false
  episode_len_mean: 270.17
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.7016999999999864
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1339
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.2078090647856394
          entropy_coeff: 0.009999999999999998
          kl: 0.0073191113138562575
          policy_loss: 0.004955859772033161
          total_loss: 0.0060559951596789886
          vf_explained_var: 0.22718358039855957
          vf_loss: 0.013131227209750148
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 41

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,414,9911.68,414000,-2.7017,-2.09,-3.35,270.17


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-24_10-53-28
  done: false
  episode_len_mean: 269.83
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.698299999999987
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 1342
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.2651990208360884
          entropy_coeff: 0.009999999999999998
          kl: 0.005239903858538695
          policy_loss: 0.002348802155918545
          total_loss: -0.001782724509636561
          vf_explained_var: 0.4236849844455719
          vf_loss: 0.008486815399697258
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 41500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,415,9935.19,415000,-2.6983,-2.09,-3.35,269.83


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-24_10-53-53
  done: false
  episode_len_mean: 269.31
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6930999999999856
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1346
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.2461420178413392
          entropy_coeff: 0.009999999999999998
          kl: 0.008709725542860584
          policy_loss: -0.008678105970223745
          total_loss: -0.007849818136956956
          vf_explained_var: 0.3005574941635132
          vf_loss: 0.013233775821410948
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,416,9960.21,416000,-2.6931,-2.09,-3.35,269.31




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-24_10-54-34
  done: false
  episode_len_mean: 269.07
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6906999999999868
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1350
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.5042781286769442
          entropy_coeff: 0.009999999999999998
          kl: 0.014051988935631322
          policy_loss: -0.021880722625388038
          total_loss: -0.02497182654009925
          vf_explained_var: 0.4688500165939331
          vf_loss: 0.011861439576993387
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 4170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,417,10001,417000,-2.6907,-2.09,-3.35,269.07


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-24_10-55-01
  done: false
  episode_len_mean: 268.21
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.682099999999987
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 3
  episodes_total: 1353
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.1865268561575149
          entropy_coeff: 0.009999999999999998
          kl: 0.009643996017174474
          policy_loss: -0.09903755953742398
          total_loss: -0.10214569779733816
          vf_explained_var: 0.5368359088897705
          vf_loss: 0.008695199599282609
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,418,10028,418000,-2.6821,-2.09,-3.35,268.21


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-24_10-55-27
  done: false
  episode_len_mean: 267.14
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.671399999999987
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1357
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 0.9693424827522702
          entropy_coeff: 0.009999999999999998
          kl: 0.008296236714194234
          policy_loss: -0.09905553687777784
          total_loss: -0.09693645276129245
          vf_explained_var: 0.4758948087692261
          vf_loss: 0.011759232139835755
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,419,10054.1,419000,-2.6714,-2.09,-3.35,267.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-24_10-55-52
  done: false
  episode_len_mean: 267.74
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.677399999999987
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1361
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.2644075353940327
          entropy_coeff: 0.009999999999999998
          kl: 0.007012907579932994
          policy_loss: -0.045097239232725565
          total_loss: -0.04671759481231372
          vf_explained_var: 0.32326653599739075
          vf_loss: 0.010978687740862369
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 4200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,420,10079.2,420000,-2.6774,-2.09,-3.35,267.74


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-24_10-56-16
  done: false
  episode_len_mean: 268.4
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6839999999999877
  episode_reward_min: -3.3499999999999726
  episodes_this_iter: 4
  episodes_total: 1365
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.310012854470147
          entropy_coeff: 0.009999999999999998
          kl: 0.009516669091944902
          policy_loss: 0.015850423359208637
          total_loss: 0.017231497334109412
          vf_explained_var: 0.05003175884485245
          vf_loss: 0.014420085990180572
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,421,10103.1,421000,-2.684,-2.09,-3.35,268.4


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-24_10-56-41
  done: false
  episode_len_mean: 267.87
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6786999999999863
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 4
  episodes_total: 1369
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.1374890844027201
          entropy_coeff: 0.009999999999999998
          kl: 0.0052859463216871445
          policy_loss: 0.005059623759653833
          total_loss: 0.005407387597693337
          vf_explained_var: 0.22983907163143158
          vf_loss: 0.011688710428360435
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 4220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,422,10127.9,422000,-2.6787,-2.09,-3.24,267.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-24_10-57-05
  done: false
  episode_len_mean: 269.1
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6909999999999865
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 3
  episodes_total: 1372
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.1733727998203702
          entropy_coeff: 0.009999999999999998
          kl: 0.010060059003266985
          policy_loss: -0.02550011765625742
          total_loss: -0.030438835836119123
          vf_explained_var: 0.3356328010559082
          vf_loss: 0.006730403800288008
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,423,10152.7,423000,-2.691,-2.09,-3.24,269.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-24_10-57-29
  done: false
  episode_len_mean: 269.1
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.690999999999986
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 4
  episodes_total: 1376
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.0422648045751783
          entropy_coeff: 0.009999999999999998
          kl: 0.013097119309124829
          policy_loss: 0.03291356505619155
          total_loss: 0.03601310526331266
          vf_explained_var: 0.2705267667770386
          vf_loss: 0.01343808039608929
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,424,10176,424000,-2.691,-2.09,-3.24,269.1




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-24_10-58-12
  done: false
  episode_len_mean: 269.39
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.693899999999987
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 4
  episodes_total: 1380
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 0.9361915462546878
          entropy_coeff: 0.009999999999999998
          kl: 0.008784126088842785
          policy_loss: 0.020153066598706776
          total_loss: 0.024061126841439143
          vf_explained_var: 0.21376043558120728
          vf_loss: 0.013213565645532475
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,425,10219,425000,-2.6939,-2.09,-3.24,269.39


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-24_10-58-38
  done: false
  episode_len_mean: 269.25
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.692499999999987
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 4
  episodes_total: 1384
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.0720935185750327
          entropy_coeff: 0.009999999999999998
          kl: 0.009229668912337551
          policy_loss: 0.029055463605456883
          total_loss: 0.03339365306827757
          vf_explained_var: 0.17795021831989288
          vf_loss: 0.014999854440490405
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 426000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,426,10245.5,426000,-2.6925,-2.09,-3.24,269.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-24_10-59-04
  done: false
  episode_len_mean: 268.71
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.6870999999999863
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 4
  episodes_total: 1388
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 0.963676451974445
          entropy_coeff: 0.009999999999999998
          kl: 0.009499573320220042
          policy_loss: 0.0035381831228733064
          total_loss: 0.008137757082780202
          vf_explained_var: 0.25611960887908936
          vf_loss: 0.014175336569961575
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 42700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,427,10271.4,427000,-2.6871,-2.09,-3.24,268.71


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-24_10-59-30
  done: false
  episode_len_mean: 269.09
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6908999999999863
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 4
  episodes_total: 1392
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 0.9593803677293989
          entropy_coeff: 0.009999999999999998
          kl: 0.01586974683723123
          policy_loss: 0.0017647960119777256
          total_loss: 0.0057004724939664205
          vf_explained_var: 0.28173673152923584
          vf_loss: 0.013427567513038715
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 4280

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,428,10297.1,428000,-2.6909,-2.18,-3.24,269.09


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-24_10-59-55
  done: false
  episode_len_mean: 268.67
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6866999999999863
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 3
  episodes_total: 1395
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.006421704176341336
          cur_lr: 5.000000000000001e-05
          entropy: 1.181883015235265
          entropy_coeff: 0.009999999999999998
          kl: 0.020616875779001495
          policy_loss: -0.09948134678933356
          total_loss: -0.09623530200786061
          vf_explained_var: 0.25823143124580383
          vf_loss: 0.01493247886084848
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,429,10322,429000,-2.6867,-2.18,-3.24,268.67


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-24_11-00-22
  done: false
  episode_len_mean: 266.96
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.669599999999987
  episode_reward_min: -3.239999999999975
  episodes_this_iter: 5
  episodes_total: 1400
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 1.0353571626875135
          entropy_coeff: 0.009999999999999998
          kl: 0.009420777342105068
          policy_loss: -0.01338161937892437
          total_loss: -0.006954698719912105
          vf_explained_var: 0.17235437035560608
          vf_loss: 0.01668974603008893
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,430,10348.6,430000,-2.6696,-2.18,-3.24,266.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-24_11-00-46
  done: false
  episode_len_mean: 266.48
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.664799999999987
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 3
  episodes_total: 1403
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 1.2009052581257291
          entropy_coeff: 0.009999999999999998
          kl: 0.010117729025984406
          policy_loss: 0.030436041785611045
          total_loss: 0.028332024812698364
          vf_explained_var: 0.32355421781539917
          vf_loss: 0.009807574257461562
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 43100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,431,10373.2,431000,-2.6648,-2.18,-3.13,266.48


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-24_11-01-10
  done: false
  episode_len_mean: 267.29
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.672899999999987
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1407
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 1.2657782753308615
          entropy_coeff: 0.009999999999999998
          kl: 0.009293251195437453
          policy_loss: 0.01249583868516816
          total_loss: 0.01507862044705285
          vf_explained_var: 0.23422150313854218
          vf_loss: 0.01515104521272911
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 432000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,432,10396.8,432000,-2.6729,-2.18,-3.13,267.29




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-24_11-01-53
  done: false
  episode_len_mean: 267.1
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6709999999999865
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1411
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 1.0581918060779572
          entropy_coeff: 0.009999999999999998
          kl: 0.006341261601736884
          policy_loss: -0.005738539662626055
          total_loss: -0.0027822504027022257
          vf_explained_var: 0.33525604009628296
          vf_loss: 0.013477125401712127
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 43

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,433,10439.8,433000,-2.671,-2.18,-3.13,267.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-24_11-02-19
  done: false
  episode_len_mean: 265.63
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.656299999999987
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1415
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 1.0055079367425708
          entropy_coeff: 0.009999999999999998
          kl: 0.005401467146768053
          policy_loss: 0.00840427064233356
          total_loss: 0.01191828010810746
          vf_explained_var: 0.24538210034370422
          vf_loss: 0.013517062345312702
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,434,10466.2,434000,-2.6563,-2.18,-3.13,265.63


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-24_11-02-44
  done: false
  episode_len_mean: 265.32
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6531999999999876
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 3
  episodes_total: 1418
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 1.104421619574229
          entropy_coeff: 0.009999999999999998
          kl: 0.006422603557109636
          policy_loss: -0.07563715875148773
          total_loss: -0.07485998906195164
          vf_explained_var: 0.3094368278980255
          vf_loss: 0.011759519049276908
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,435,10491.2,435000,-2.6532,-2.18,-3.13,265.32


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-24_11-03-10
  done: false
  episode_len_mean: 265.3
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6529999999999876
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1422
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 1.0128214657306671
          entropy_coeff: 0.009999999999999998
          kl: 0.005628751103272853
          policy_loss: -0.0313149774654044
          total_loss: -0.028717990302377278
          vf_explained_var: 0.3463852107524872
          vf_loss: 0.012670982784281174
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,436,10517.4,436000,-2.653,-2.18,-3.13,265.3


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-24_11-03-36
  done: false
  episode_len_mean: 264.53
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.6452999999999873
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1426
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 0.9421915451685587
          entropy_coeff: 0.009999999999999998
          kl: 0.005714780781108825
          policy_loss: -0.055938495364454055
          total_loss: -0.05239166220029195
          vf_explained_var: 0.3740311861038208
          vf_loss: 0.012913703587320116
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 4370

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,437,10543.1,437000,-2.6453,-2.18,-3.13,264.53


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-24_11-04-03
  done: false
  episode_len_mean: 263.9
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.638999999999988
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1430
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 0.8271106600761413
          entropy_coeff: 0.009999999999999998
          kl: 0.007752968425970409
          policy_loss: -0.024316264357831744
          total_loss: -0.02016445476975706
          vf_explained_var: 0.3629644513130188
          vf_loss: 0.012348236629946364
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,438,10569.9,438000,-2.639,-2.18,-3.13,263.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-24_11-04-30
  done: false
  episode_len_mean: 261.31
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.613099999999988
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 5
  episodes_total: 1435
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 0.8384015652868483
          entropy_coeff: 0.009999999999999998
          kl: 0.007663769204943015
          policy_loss: -0.03618830218911171
          total_loss: -0.029551327642467286
          vf_explained_var: 0.3509426712989807
          vf_loss: 0.014947170195066266
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 43900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,439,10597.1,439000,-2.6131,-2.18,-3.13,261.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-24_11-04-56
  done: false
  episode_len_mean: 260.82
  episode_media: {}
  episode_reward_max: -2.1799999999999975
  episode_reward_mean: -2.608199999999988
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 3
  episodes_total: 1438
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009632556264512006
          cur_lr: 5.000000000000001e-05
          entropy: 0.9258170518610213
          entropy_coeff: 0.009999999999999998
          kl: 0.039827111434905284
          policy_loss: -0.02095820465021663
          total_loss: -0.021589102264907626
          vf_explained_var: 0.49105212092399597
          vf_loss: 0.008243637149118715
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 4400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,440,10622.9,440000,-2.6082,-2.18,-3.13,260.82




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-24_11-05-41
  done: false
  episode_len_mean: 258.76
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5875999999999886
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 5
  episodes_total: 1443
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014448834396768001
          cur_lr: 5.000000000000001e-05
          entropy: 0.6832957082324558
          entropy_coeff: 0.009999999999999998
          kl: 0.00742496445228961
          policy_loss: -0.031557680004172854
          total_loss: -0.025018209922644828
          vf_explained_var: 0.41583681106567383
          vf_loss: 0.013265146118485265
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,441,10667.8,441000,-2.5876,-2.07,-3.13,258.76


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-24_11-06-07
  done: false
  episode_len_mean: 258.31
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.583099999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1447
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014448834396768001
          cur_lr: 5.000000000000001e-05
          entropy: 0.7829208407137129
          entropy_coeff: 0.009999999999999998
          kl: 0.015485314853629372
          policy_loss: -0.028226326985491646
          total_loss: -0.025214219838380812
          vf_explained_var: 0.24869593977928162
          vf_loss: 0.010617573554110195
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 442000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,442,10694.1,442000,-2.5831,-2.07,-3.13,258.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-24_11-06-33
  done: false
  episode_len_mean: 257.15
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5714999999999884
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1451
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014448834396768001
          cur_lr: 5.000000000000001e-05
          entropy: 0.7464255538251665
          entropy_coeff: 0.009999999999999998
          kl: 0.006303346220536494
          policy_loss: 0.020414423859781688
          total_loss: 0.024505347427394655
          vf_explained_var: 0.19436033070087433
          vf_loss: 0.011464105194641484
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,443,10720.2,443000,-2.5715,-2.07,-3.13,257.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-24_11-06-58
  done: false
  episode_len_mean: 257.26
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.572599999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 3
  episodes_total: 1454
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.014448834396768001
          cur_lr: 5.000000000000001e-05
          entropy: 0.7898966279294756
          entropy_coeff: 0.009999999999999998
          kl: 0.004689969934914097
          policy_loss: -0.09804830518033769
          total_loss: -0.09435506181584465
          vf_explained_var: 0.1690034717321396
          vf_loss: 0.011524442045225038
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,444,10744.9,444000,-2.5726,-2.07,-3.13,257.26


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-24_11-07-23
  done: false
  episode_len_mean: 258.03
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5802999999999887
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1458
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0072244171983840005
          cur_lr: 5.000000000000001e-05
          entropy: 0.8293249547481537
          entropy_coeff: 0.009999999999999998
          kl: 0.008531553176302964
          policy_loss: 0.007500806947549185
          total_loss: 0.009745542539490594
          vf_explained_var: 0.2903570532798767
          vf_loss: 0.010476350722213586
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,445,10769.4,445000,-2.5803,-2.07,-3.13,258.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-24_11-07-48
  done: false
  episode_len_mean: 258.17
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.581699999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1462
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0072244171983840005
          cur_lr: 5.000000000000001e-05
          entropy: 0.7339989099237654
          entropy_coeff: 0.009999999999999998
          kl: 0.005715797298457264
          policy_loss: 0.022842247949706185
          total_loss: 0.027088950408829582
          vf_explained_var: 0.2831706702709198
          vf_loss: 0.011545397186030945
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,446,10794.6,446000,-2.5817,-2.07,-3.13,258.17


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-24_11-08-13
  done: false
  episode_len_mean: 257.87
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.578699999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1466
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0072244171983840005
          cur_lr: 5.000000000000001e-05
          entropy: 0.6856080998977025
          entropy_coeff: 0.009999999999999998
          kl: 0.007472757629937219
          policy_loss: 0.023116258283456165
          total_loss: 0.027861135204633077
          vf_explained_var: 0.28443029522895813
          vf_loss: 0.011546974153154426
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,447,10820.2,447000,-2.5787,-2.07,-3.13,257.87




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-24_11-08-55
  done: false
  episode_len_mean: 257.36
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.573599999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1470
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0072244171983840005
          cur_lr: 5.000000000000001e-05
          entropy: 0.9427243159876929
          entropy_coeff: 0.009999999999999998
          kl: 0.009756990991275197
          policy_loss: -0.007911561263932122
          total_loss: -0.005886436502138773
          vf_explained_var: 0.3788664937019348
          vf_loss: 0.01138187704814805
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 4480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,448,10861.9,448000,-2.5736,-1.98,-3.13,257.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-24_11-09-23
  done: false
  episode_len_mean: 256.39
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.563899999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1474
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0072244171983840005
          cur_lr: 5.000000000000001e-05
          entropy: 0.8878736264175839
          entropy_coeff: 0.009999999999999998
          kl: 0.012023402887734057
          policy_loss: -0.017523412075307634
          total_loss: -0.013051020436816746
          vf_explained_var: 0.19423805177211761
          vf_loss: 0.013264265884127881
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 44

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,449,10889.9,449000,-2.5639,-1.98,-3.13,256.39


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-24_11-09-50
  done: false
  episode_len_mean: 255.24
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.5523999999999893
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1478
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0072244171983840005
          cur_lr: 5.000000000000001e-05
          entropy: 0.6996509518888262
          entropy_coeff: 0.009999999999999998
          kl: 0.005851807947710529
          policy_loss: 0.01552795527709855
          total_loss: 0.018175602704286576
          vf_explained_var: 0.4560015797615051
          vf_loss: 0.009601882472634316
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 45000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,450,10916.3,450000,-2.5524,-1.98,-3.13,255.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-24_11-10-15
  done: false
  episode_len_mean: 256.1
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.560999999999989
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1482
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0072244171983840005
          cur_lr: 5.000000000000001e-05
          entropy: 0.9917182975345188
          entropy_coeff: 0.009999999999999998
          kl: 0.030373340718925394
          policy_loss: 0.016796064666575856
          total_loss: 0.01716876510116789
          vf_explained_var: 0.5460103154182434
          vf_loss: 0.010070454639693102
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,451,10941.2,451000,-2.561,-1.98,-3.13,256.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-24_11-10-41
  done: false
  episode_len_mean: 255.33
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.5532999999999895
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1486
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010836625797576008
          cur_lr: 5.000000000000001e-05
          entropy: 0.7065065655443403
          entropy_coeff: 0.009999999999999998
          kl: 0.009454060611101264
          policy_loss: -0.021768378673328294
          total_loss: -0.0189791036148866
          vf_explained_var: 0.48224231600761414
          vf_loss: 0.009751887598799334
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 4520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,452,10967.7,452000,-2.5533,-1.98,-3.13,255.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-24_11-11-08
  done: false
  episode_len_mean: 255.03
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.5502999999999894
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1490
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010836625797576008
          cur_lr: 5.000000000000001e-05
          entropy: 0.7044594082567427
          entropy_coeff: 0.009999999999999998
          kl: 0.008622107438429635
          policy_loss: 0.04337243148022228
          total_loss: 0.04713549431827333
          vf_explained_var: 0.22546230256557465
          vf_loss: 0.010714221311112245
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,453,10995.1,453000,-2.5503,-1.98,-3.13,255.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-10-24_11-11-36
  done: false
  episode_len_mean: 253.98
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.5397999999999894
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1494
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010836625797576008
          cur_lr: 5.000000000000001e-05
          entropy: 0.6899041950702667
          entropy_coeff: 0.009999999999999998
          kl: 0.007721575950615842
          policy_loss: 0.016423273583253226
          total_loss: 0.02104494170182281
          vf_explained_var: 0.23832456767559052
          vf_loss: 0.011437034596585564
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 45400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,454,11022.7,454000,-2.5398,-1.98,-3.13,253.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-10-24_11-12-04
  done: false
  episode_len_mean: 253.1
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.53099999999999
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1498
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.010836625797576008
          cur_lr: 5.000000000000001e-05
          entropy: 0.6809104813469781
          entropy_coeff: 0.009999999999999998
          kl: 0.02864227219690447
          policy_loss: 0.011089325116740333
          total_loss: 0.01465009285344018
          vf_explained_var: 0.3591906726360321
          vf_loss: 0.010059487322966259
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,455,11050.5,455000,-2.531,-1.98,-3.13,253.1




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-10-24_11-12-49
  done: false
  episode_len_mean: 252.35
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.52349999999999
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1502
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01625493869636401
          cur_lr: 5.000000000000001e-05
          entropy: 0.7283923765023549
          entropy_coeff: 0.009999999999999998
          kl: 0.028854469636895096
          policy_loss: -0.10930883404281404
          total_loss: -0.09824405825800366
          vf_explained_var: 0.12245792895555496
          vf_loss: 0.01787966900608606
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,456,11095.8,456000,-2.5235,-1.98,-3.13,252.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-10-24_11-13-15
  done: false
  episode_len_mean: 251.33
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.5132999999999903
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1506
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024382408044546007
          cur_lr: 5.000000000000001e-05
          entropy: 0.8278193493684133
          entropy_coeff: 0.009999999999999998
          kl: 0.016107976402323566
          policy_loss: -0.1042579495244556
          total_loss: -0.09274282885922326
          vf_explained_var: 0.1223599761724472
          vf_loss: 0.019400561114566194
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 457000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,457,11122,457000,-2.5133,-1.98,-3.13,251.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-10-24_11-13-43
  done: false
  episode_len_mean: 249.73
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4972999999999907
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 5
  episodes_total: 1511
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.024382408044546007
          cur_lr: 5.000000000000001e-05
          entropy: 0.8234800193044874
          entropy_coeff: 0.009999999999999998
          kl: 0.06785499922843227
          policy_loss: 0.011246869423323208
          total_loss: 0.01795299156672425
          vf_explained_var: 0.36862555146217346
          vf_loss: 0.013286453816625807
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,458,11149.6,458000,-2.4973,-1.98,-3.13,249.73


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-10-24_11-14-11
  done: false
  episode_len_mean: 249.25
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.492499999999991
  episode_reward_min: -3.1299999999999772
  episodes_this_iter: 4
  episodes_total: 1515
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.03657361206681903
          cur_lr: 5.000000000000001e-05
          entropy: 0.4947149912516276
          entropy_coeff: 0.009999999999999998
          kl: 0.0035615169069864627
          policy_loss: 0.056418691906664105
          total_loss: 0.06017272281977865
          vf_explained_var: 0.506807804107666
          vf_loss: 0.00857092361483309
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,459,11177,459000,-2.4925,-1.98,-3.13,249.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-10-24_11-14-38
  done: false
  episode_len_mean: 248.06
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.480599999999991
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 1519
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.018286806033409514
          cur_lr: 5.000000000000001e-05
          entropy: 0.5356109645631578
          entropy_coeff: 0.009999999999999998
          kl: 0.012972228078346701
          policy_loss: 0.03409268980224927
          total_loss: 0.03810761736498939
          vf_explained_var: 0.3351089656352997
          vf_loss: 0.009133816816999266
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,460,11204.7,460000,-2.4806,-1.98,-2.97,248.06


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-10-24_11-15-06
  done: false
  episode_len_mean: 247.54
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.475399999999991
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 1523
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.018286806033409514
          cur_lr: 5.000000000000001e-05
          entropy: 0.4314116640223397
          entropy_coeff: 0.009999999999999998
          kl: 0.00348984439592876
          policy_loss: -0.044532242996825115
          total_loss: -0.035915689046184224
          vf_explained_var: 0.1518448144197464
          vf_loss: 0.012866853829473257
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 46100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,461,11232.5,461000,-2.4754,-1.98,-2.97,247.54


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-10-24_11-15-33
  done: false
  episode_len_mean: 246.49
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.464899999999991
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 5
  episodes_total: 1528
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.009143403016704757
          cur_lr: 5.000000000000001e-05
          entropy: 0.5440368771553039
          entropy_coeff: 0.009999999999999998
          kl: 0.004496205140947356
          policy_loss: -0.011662734382682376
          total_loss: -0.0013505728708373176
          vf_explained_var: 0.19741782546043396
          vf_loss: 0.015711421395341554
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 46

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,462,11259.6,462000,-2.4649,-1.98,-2.97,246.49




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-10-24_11-16-19
  done: false
  episode_len_mean: 245.86
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4585999999999917
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 1532
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0045717015083523785
          cur_lr: 5.000000000000001e-05
          entropy: 0.511597767803404
          entropy_coeff: 0.009999999999999998
          kl: 0.008328980363798956
          policy_loss: 0.026598722570472293
          total_loss: 0.03535189314021005
          vf_explained_var: 0.1057569608092308
          vf_loss: 0.01383106837876969
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 463000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,463,11305.1,463000,-2.4586,-1.98,-2.97,245.86


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-10-24_11-16-47
  done: false
  episode_len_mean: 245.36
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.453599999999992
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 1536
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0045717015083523785
          cur_lr: 5.000000000000001e-05
          entropy: 0.5037715206543605
          entropy_coeff: 0.009999999999999998
          kl: 0.004508917467950211
          policy_loss: -0.011919149094157748
          total_loss: -0.004350397570265664
          vf_explained_var: 0.16708742082118988
          vf_loss: 0.012585854375114043
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 46

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,464,11333.1,464000,-2.4536,-1.98,-2.97,245.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-10-24_11-17-14
  done: false
  episode_len_mean: 245.03
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4502999999999915
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 1540
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022858507541761892
          cur_lr: 5.000000000000001e-05
          entropy: 0.4998779508802626
          entropy_coeff: 0.009999999999999998
          kl: 0.0062916860894918925
          policy_loss: -0.12058846867746777
          total_loss: -0.1101001624431875
          vf_explained_var: 0.21542085707187653
          vf_loss: 0.015472705558770233
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,465,11359.9,465000,-2.4503,-1.98,-2.97,245.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-10-24_11-17-40
  done: false
  episode_len_mean: 244.69
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4468999999999914
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 5
  episodes_total: 1545
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022858507541761892
          cur_lr: 5.000000000000001e-05
          entropy: 0.6106579787201352
          entropy_coeff: 0.009999999999999998
          kl: 0.0050211155722431995
          policy_loss: -0.00814091128607591
          total_loss: -0.0013729749040471183
          vf_explained_var: 0.2904452979564667
          vf_loss: 0.012863039722045262
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,466,11386.6,466000,-2.4469,-1.98,-2.97,244.69


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-10-24_11-18-06
  done: false
  episode_len_mean: 244.68
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4467999999999916
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 1549
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022858507541761892
          cur_lr: 5.000000000000001e-05
          entropy: 0.6071074515581131
          entropy_coeff: 0.009999999999999998
          kl: 0.011544455431649206
          policy_loss: 0.029212177875969143
          total_loss: 0.033688525441620085
          vf_explained_var: 0.20244884490966797
          vf_loss: 0.010521033152730928
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,467,11412.6,467000,-2.4468,-1.98,-2.97,244.68


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-10-24_11-18-32
  done: false
  episode_len_mean: 244.89
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.448899999999991
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 1553
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0022858507541761892
          cur_lr: 5.000000000000001e-05
          entropy: 0.4313333921962314
          entropy_coeff: 0.009999999999999998
          kl: 0.029675304535453507
          policy_loss: 0.03695117558042208
          total_loss: 0.045251416663328804
          vf_explained_var: 0.10378678888082504
          vf_loss: 0.012545740832057265
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 46800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,468,11438.6,468000,-2.4489,-1.98,-2.97,244.89


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-10-24_11-19-00
  done: false
  episode_len_mean: 243.54
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4353999999999916
  episode_reward_min: -2.9699999999999807
  episodes_this_iter: 4
  episodes_total: 1557
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0034287761312642834
          cur_lr: 5.000000000000001e-05
          entropy: 0.5336385677258174
          entropy_coeff: 0.009999999999999998
          kl: 0.007603027412489875
          policy_loss: 0.04248811420467165
          total_loss: 0.04898253215683831
          vf_explained_var: 0.161097452044487
          vf_loss: 0.011804735174195634
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,469,11465.8,469000,-2.4354,-1.98,-2.97,243.54




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-10-24_11-19-42
  done: false
  episode_len_mean: 243.75
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4374999999999925
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 1561
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0034287761312642834
          cur_lr: 5.000000000000001e-05
          entropy: 0.7539206246534983
          entropy_coeff: 0.009999999999999998
          kl: 0.015981233656581394
          policy_loss: 0.023796176165342332
          total_loss: 0.029311712582906088
          vf_explained_var: 0.13857999444007874
          vf_loss: 0.012999943395455679
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,470,11508.3,470000,-2.4375,-1.98,-3.59,243.75


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-10-24_11-20-07
  done: false
  episode_len_mean: 243.59
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4358999999999913
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 3
  episodes_total: 1564
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0034287761312642834
          cur_lr: 5.000000000000001e-05
          entropy: 0.6404309302568436
          entropy_coeff: 0.009999999999999998
          kl: 0.013497200776084127
          policy_loss: -0.006227126883135902
          total_loss: -0.002898943962322341
          vf_explained_var: 0.21916639804840088
          vf_loss: 0.009686213561023276
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,471,11533.4,471000,-2.4359,-1.98,-3.59,243.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-10-24_11-20-34
  done: false
  episode_len_mean: 242.81
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4280999999999917
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 1568
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0034287761312642834
          cur_lr: 5.000000000000001e-05
          entropy: 0.49535256557994417
          entropy_coeff: 0.009999999999999998
          kl: 0.0069641625690025554
          policy_loss: -0.1121337184475528
          total_loss: -0.10159673591454824
          vf_explained_var: 0.11578579246997833
          vf_loss: 0.015466629103240039
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 47

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,472,11560.3,472000,-2.4281,-1.98,-3.59,242.81


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-10-24_11-21-01
  done: false
  episode_len_mean: 242.52
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4251999999999923
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 5
  episodes_total: 1573
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0034287761312642834
          cur_lr: 5.000000000000001e-05
          entropy: 0.4053583976295259
          entropy_coeff: 0.009999999999999998
          kl: 0.00582465417397739
          policy_loss: 0.005617159729202588
          total_loss: 0.01572328367167049
          vf_explained_var: 0.17749932408332825
          vf_loss: 0.014139739403294192
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 47300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,473,11587.4,473000,-2.4252,-2.03,-3.59,242.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-10-24_11-21-29
  done: false
  episode_len_mean: 241.98
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.419799999999992
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 1577
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0034287761312642834
          cur_lr: 5.000000000000001e-05
          entropy: 0.35360413591066997
          entropy_coeff: 0.009999999999999998
          kl: 0.003433067286140733
          policy_loss: 0.0347694123784701
          total_loss: 0.04290841387377845
          vf_explained_var: 0.16618283092975616
          vf_loss: 0.011663274891260598
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,474,11614.9,474000,-2.4198,-2.03,-3.59,241.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-10-24_11-21-56
  done: false
  episode_len_mean: 241.14
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4113999999999924
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 1581
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0017143880656321417
          cur_lr: 5.000000000000001e-05
          entropy: 0.37613723542955185
          entropy_coeff: 0.009999999999999998
          kl: 0.001997949553281507
          policy_loss: -0.014846131039990319
          total_loss: -0.00582318181792895
          vf_explained_var: 0.09102614969015121
          vf_loss: 0.012780897298620806
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,475,11642.5,475000,-2.4114,-2.03,-3.59,241.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-10-24_11-22-23
  done: false
  episode_len_mean: 240.72
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4071999999999925
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 4
  episodes_total: 1585
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0008571940328160709
          cur_lr: 5.000000000000001e-05
          entropy: 0.4811196943124135
          entropy_coeff: 0.009999999999999998
          kl: 0.0038778427562814145
          policy_loss: -0.06339124275578392
          total_loss: -0.05626857289009624
          vf_explained_var: 0.1916920393705368
          vf_loss: 0.011930544612308342
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,476,11669,476000,-2.4072,-2.03,-3.59,240.72




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-10-24_11-23-07
  done: false
  episode_len_mean: 240.66
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.406599999999993
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 5
  episodes_total: 1590
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00042859701640803543
          cur_lr: 5.000000000000001e-05
          entropy: 0.4278231352567673
          entropy_coeff: 0.009999999999999998
          kl: 0.005238458748608268
          policy_loss: -0.029760701374875173
          total_loss: -0.018157261030541525
          vf_explained_var: 0.17452532052993774
          vf_loss: 0.015879427124228743
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,477,11712.7,477000,-2.4066,-2.03,-3.59,240.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-10-24_11-23-26
  done: false
  episode_len_mean: 244.27
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4426999999999923
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 2
  episodes_total: 1592
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00042859701640803543
          cur_lr: 5.000000000000001e-05
          entropy: 1.167068295346366
          entropy_coeff: 0.009999999999999998
          kl: 0.028468615493654505
          policy_loss: 0.05486786746316486
          total_loss: 0.048718947834438746
          vf_explained_var: 0.2801617383956909
          vf_loss: 0.0055095552085226195
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 47800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,478,11732.1,478000,-2.4427,-2.03,-4.39,244.27


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-10-24_11-23-49
  done: false
  episode_len_mean: 245.86
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4585999999999917
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1596
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.000642895524612053
          cur_lr: 5.000000000000001e-05
          entropy: 0.7090266399913364
          entropy_coeff: 0.009999999999999998
          kl: 0.007998660106419524
          policy_loss: 0.018046481410662334
          total_loss: 0.022887261791361704
          vf_explained_var: 0.214222252368927
          vf_loss: 0.01192590349043409
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,479,11755.3,479000,-2.4586,-2.03,-4.39,245.86


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-10-24_11-24-14
  done: false
  episode_len_mean: 246.91
  episode_media: {}
  episode_reward_max: -2.0300000000000007
  episode_reward_mean: -2.4690999999999916
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 3
  episodes_total: 1599
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.000642895524612053
          cur_lr: 5.000000000000001e-05
          entropy: 0.625228034125434
          entropy_coeff: 0.009999999999999998
          kl: 0.006116358015042482
          policy_loss: -0.08631967935297225
          total_loss: -0.08222464389271206
          vf_explained_var: 0.18065688014030457
          vf_loss: 0.010343383330230911
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 480000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,480,11780.5,480000,-2.4691,-2.03,-4.39,246.91


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-10-24_11-24-39
  done: false
  episode_len_mean: 247.53
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4752999999999914
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1603
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.000642895524612053
          cur_lr: 5.000000000000001e-05
          entropy: 0.5379651602771547
          entropy_coeff: 0.009999999999999998
          kl: 0.0020797805768907945
          policy_loss: -0.09064122579163975
          total_loss: -0.08180041681561205
          vf_explained_var: 0.10819332301616669
          vf_loss: 0.014219125867303875
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 4810

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,481,11805.4,481000,-2.4753,-2.05,-4.39,247.53


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-10-24_11-25-06
  done: false
  episode_len_mean: 247.82
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4781999999999913
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1607
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003214477623060265
          cur_lr: 5.000000000000001e-05
          entropy: 0.5531416555245717
          entropy_coeff: 0.009999999999999998
          kl: 0.0043515479048641245
          policy_loss: -0.025485532813602024
          total_loss: -0.018066103259722393
          vf_explained_var: 0.09993097186088562
          vf_loss: 0.012949447106156085
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,482,11831.6,482000,-2.4782,-2.05,-4.39,247.82


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-10-24_11-25-31
  done: false
  episode_len_mean: 248.69
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.486899999999991
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1611
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00016072388115301325
          cur_lr: 5.000000000000001e-05
          entropy: 0.5301475054687924
          entropy_coeff: 0.009999999999999998
          kl: 0.03141088604557933
          policy_loss: -0.03818185677131017
          total_loss: -0.031019145250320436
          vf_explained_var: 0.18600669503211975
          vf_loss: 0.012459138304822974
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 4830

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,483,11856.9,483000,-2.4869,-2.05,-4.39,248.69


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-10-24_11-25-59
  done: false
  episode_len_mean: 248.66
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.486599999999991
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 5
  episodes_total: 1616
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00024108582172951992
          cur_lr: 5.000000000000001e-05
          entropy: 0.270597102244695
          entropy_coeff: 0.009999999999999998
          kl: 0.005049798542290852
          policy_loss: -0.028436937265925936
          total_loss: -0.017146163351005977
          vf_explained_var: 0.22691872715950012
          vf_loss: 0.013995527651988797
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,484,11884.6,484000,-2.4866,-2.05,-4.39,248.66




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-10-24_11-26-43
  done: false
  episode_len_mean: 248.22
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4821999999999904
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1620
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00024108582172951992
          cur_lr: 5.000000000000001e-05
          entropy: 0.5276336683167352
          entropy_coeff: 0.009999999999999998
          kl: 0.06659235464985581
          policy_loss: 0.014644413855340746
          total_loss: 0.019840757507416935
          vf_explained_var: 0.23416535556316376
          vf_loss: 0.010456625962009032
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 4850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,485,11928.6,485000,-2.4822,-2.05,-4.39,248.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-10-24_11-27-09
  done: false
  episode_len_mean: 249.82
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4981999999999904
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1624
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003616287325942798
          cur_lr: 5.000000000000001e-05
          entropy: 0.8226053529315525
          entropy_coeff: 0.009999999999999998
          kl: 0.012039683160204914
          policy_loss: -0.010865830298927095
          total_loss: -0.005337354458040662
          vf_explained_var: 0.0760117918252945
          vf_loss: 0.01375017466230525
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 4860

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,486,11954.6,486000,-2.4982,-2.05,-4.39,249.82


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-10-24_11-27-33
  done: false
  episode_len_mean: 250.9
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.50899999999999
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 3
  episodes_total: 1627
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003616287325942798
          cur_lr: 5.000000000000001e-05
          entropy: 0.9059902191162109
          entropy_coeff: 0.009999999999999998
          kl: 0.013896343767451608
          policy_loss: -0.00018787475095854864
          total_loss: 0.000705305321349038
          vf_explained_var: 0.04639450088143349
          vf_loss: 0.009948060696478933
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 4870

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,487,11978.6,487000,-2.509,-2.05,-4.39,250.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-10-24_11-27-57
  done: false
  episode_len_mean: 252.81
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5280999999999896
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1631
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003616287325942798
          cur_lr: 5.000000000000001e-05
          entropy: 0.743389939599567
          entropy_coeff: 0.009999999999999998
          kl: 0.01737351756193806
          policy_loss: -0.00615583856900533
          total_loss: -7.198916541205512e-06
          vf_explained_var: 0.10820574313402176
          vf_loss: 0.013576255604210828
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,488,12002.9,488000,-2.5281,-2.07,-4.39,252.81


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-10-24_11-28-25
  done: false
  episode_len_mean: 252.85
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5284999999999895
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1635
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0003616287325942798
          cur_lr: 5.000000000000001e-05
          entropy: 0.3274990412924025
          entropy_coeff: 0.009999999999999998
          kl: 0.0038824450331764937
          policy_loss: 0.02538892858558231
          total_loss: 0.03333424015177621
          vf_explained_var: 0.17112454771995544
          vf_loss: 0.011218897346407176
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,489,12030.9,489000,-2.5285,-2.07,-4.39,252.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-10-24_11-28-53
  done: false
  episode_len_mean: 252.9
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.5289999999999897
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1639
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001808143662971399
          cur_lr: 5.000000000000001e-05
          entropy: 0.3640635096364551
          entropy_coeff: 0.009999999999999998
          kl: 0.0015576908569343967
          policy_loss: -0.009696588416894277
          total_loss: -0.0015429151554902396
          vf_explained_var: 0.18658307194709778
          vf_loss: 0.011794029134843084
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,490,12058.5,490000,-2.529,-2.07,-4.39,252.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-10-24_11-29-20
  done: false
  episode_len_mean: 252.53
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.52529999999999
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 5
  episodes_total: 1644
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.040718314856995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3862751437558068
          entropy_coeff: 0.009999999999999998
          kl: 0.0043995018731814006
          policy_loss: -0.02577675994899538
          total_loss: -0.014154808057679071
          vf_explained_var: 0.16607670485973358
          vf_loss: 0.015484304415682952
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,491,12085.6,491000,-2.5253,-2.07,-4.39,252.53


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-10-24_11-29-47
  done: false
  episode_len_mean: 252.06
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.52059999999999
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1648
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5203591574284975e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3695612142483393
          entropy_coeff: 0.009999999999999998
          kl: 0.0018421178422645963
          policy_loss: 0.03066441458132532
          total_loss: 0.03850139213932885
          vf_explained_var: 0.12509416043758392
          vf_loss: 0.011532508105867438
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,492,12113.1,492000,-2.5206,-2.07,-4.39,252.06




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-10-24_11-30-31
  done: false
  episode_len_mean: 252.07
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.52069999999999
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1652
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2601795787142487e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.513493122988277
          entropy_coeff: 0.009999999999999998
          kl: 0.0025179942921127463
          policy_loss: 0.03321757217248281
          total_loss: 0.04150960784819391
          vf_explained_var: 0.1631402224302292
          vf_loss: 0.01342691309336159
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,493,12157.3,493000,-2.5207,-2.07,-4.39,252.07


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-10-24_11-30-56
  done: false
  episode_len_mean: 252.69
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.52689999999999
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1656
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1300897893571244e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7238335059748755
          entropy_coeff: 0.009999999999999998
          kl: 0.0041476560978174525
          policy_loss: 0.02003611135813925
          total_loss: 0.028577569872140884
          vf_explained_var: 0.02938631922006607
          vf_loss: 0.015779750007722113
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,494,12182,494000,-2.5269,-2.07,-4.39,252.69


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-10-24_11-31-22
  done: false
  episode_len_mean: 253.27
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.53269999999999
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 3
  episodes_total: 1659
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.650448946785622e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.63135128153695
          entropy_coeff: 0.009999999999999998
          kl: 0.04510686756144982
          policy_loss: -0.108363186650806
          total_loss: -0.09846898333893882
          vf_explained_var: 0.03574206680059433
          vf_loss: 0.016207461452318564
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 495000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,495,12207.5,495000,-2.5327,-2.07,-4.39,253.27


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-10-24_11-31-46
  done: false
  episode_len_mean: 252.78
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.52779999999999
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1663
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.475673420178432e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7836781515015496
          entropy_coeff: 0.009999999999999998
          kl: 0.021704054228527208
          policy_loss: -0.041098588539494406
          total_loss: -0.037282721449931465
          vf_explained_var: 0.11610429733991623
          vf_loss: 0.011652462468999956
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 496000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,496,12232.1,496000,-2.5278,-2.07,-4.39,252.78


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-10-24_11-32-08
  done: false
  episode_len_mean: 253.98
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.53979999999999
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 3
  episodes_total: 1666
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2713510130267645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7912097563346226
          entropy_coeff: 0.009999999999999998
          kl: 0.011756736065396348
          policy_loss: -0.11565790623426438
          total_loss: -0.10755742175711526
          vf_explained_var: 0.16326633095741272
          vf_loss: 0.01601243561340703
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,497,12253.6,497000,-2.5398,-2.07,-4.39,253.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-10-24_11-32-32
  done: false
  episode_len_mean: 256.0
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.559999999999989
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1670
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2713510130267645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6667993525664012
          entropy_coeff: 0.009999999999999998
          kl: 0.005383929788650911
          policy_loss: 0.05317534117235078
          total_loss: 0.05725832308332125
          vf_explained_var: 0.2415134608745575
          vf_loss: 0.010750909335911274
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 498000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,498,12277.3,498000,-2.56,-2.07,-4.39,256


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-10-24_11-32-55
  done: false
  episode_len_mean: 257.47
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.574699999999989
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1674
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2713510130267645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7592899726496802
          entropy_coeff: 0.009999999999999998
          kl: 0.012445688505486174
          policy_loss: 0.03978385221627023
          total_loss: 0.043647872739368015
          vf_explained_var: 0.06408434361219406
          vf_loss: 0.01145676261641913
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,499,12301.1,499000,-2.5747,-2.07,-4.39,257.47


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-10-24_11-33-15
  done: false
  episode_len_mean: 260.59
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.6058999999999886
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 3
  episodes_total: 1677
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2713510130267645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8684615174929301
          entropy_coeff: 0.009999999999999998
          kl: 0.00806296405758127
          policy_loss: 0.05993977040052414
          total_loss: 0.06168365544743008
          vf_explained_var: -0.18221229314804077
          vf_loss: 0.010428400063473318
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,500,12320.7,500000,-2.6059,-2.07,-4.39,260.59




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-10-24_11-33-51
  done: false
  episode_len_mean: 263.86
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.638599999999988
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 3
  episodes_total: 1680
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2713510130267645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9804502911037869
          entropy_coeff: 0.009999999999999998
          kl: 0.013850341936119845
          policy_loss: 0.039657434903913075
          total_loss: 0.04150902670290735
          vf_explained_var: 0.11783306300640106
          vf_loss: 0.011655918243599848
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 501000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,501,12356.7,501000,-2.6386,-2.07,-4.39,263.86


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-10-24_11-34-15
  done: false
  episode_len_mean: 266.03
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.6602999999999866
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 3
  episodes_total: 1683
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2713510130267645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.8962983131408692
          entropy_coeff: 0.009999999999999998
          kl: 0.01163709409116866
          policy_loss: 0.05483721693356832
          total_loss: 0.05654948403437932
          vf_explained_var: -0.11528098583221436
          vf_loss: 0.010675102066145175
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,502,12380.6,502000,-2.6603,-2.07,-4.39,266.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-10-24_11-34-40
  done: false
  episode_len_mean: 266.4
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.663999999999987
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1687
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2713510130267645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6228230552540885
          entropy_coeff: 0.009999999999999998
          kl: 0.01591613856150889
          policy_loss: 0.035038256976339555
          total_loss: 0.04235021074612935
          vf_explained_var: 0.20821140706539154
          vf_loss: 0.013539986291693316
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,503,12405.6,503000,-2.664,-2.07,-4.39,266.4


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-10-24_11-35-06
  done: false
  episode_len_mean: 266.09
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.660899999999988
  episode_reward_min: -4.389999999999951
  episodes_this_iter: 4
  episodes_total: 1691
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2713510130267645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7206743856271108
          entropy_coeff: 0.009999999999999998
          kl: 0.008989674922938364
          policy_loss: 0.0026183542278077868
          total_loss: 0.007646736461255285
          vf_explained_var: 0.38230377435684204
          vf_loss: 0.012235014357914527
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,504,12431.4,504000,-2.6609,-2.07,-4.39,266.09


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-10-24_11-35-20
  done: false
  episode_len_mean: 268.9
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.688999999999987
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 1
  episodes_total: 1692
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2713510130267645e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.7575121137830947
          entropy_coeff: 0.009999999999999998
          kl: 0.028482704303800076
          policy_loss: -0.023727636535962424
          total_loss: -0.03575711051623027
          vf_explained_var: 0.2982446253299713
          vf_loss: 0.005545286584593769
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,505,12445.7,505000,-2.689,-2.07,-7.2,268.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-10-24_11-35-35
  done: false
  episode_len_mean: 276.07
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.7606999999999853
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 2
  episodes_total: 1694
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9070265195401472e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.7476452231407165
          entropy_coeff: 0.009999999999999998
          kl: 0.014709408031757753
          policy_loss: -0.004378050400151147
          total_loss: -0.018520673281616634
          vf_explained_var: 0.24476563930511475
          vf_loss: 0.0033335462354847955
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,506,12460.7,506000,-2.7607,-2.07,-7.2,276.07


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-10-24_11-35-54
  done: false
  episode_len_mean: 279.26
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.7925999999999847
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 2
  episodes_total: 1696
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9070265195401472e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.4848910186025832
          entropy_coeff: 0.009999999999999998
          kl: 0.03472979668779771
          policy_loss: 0.08933109490511318
          total_loss: 0.07917247445633015
          vf_explained_var: 0.6118830442428589
          vf_loss: 0.004689627270434155
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,507,12480,507000,-2.7926,-2.07,-7.2,279.26


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-10-24_11-36-19
  done: false
  episode_len_mean: 279.66
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.796599999999984
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1700
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8605397793102208e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.7381087760130565
          entropy_coeff: 0.009999999999999998
          kl: 0.008359726452467244
          policy_loss: -0.005030946764681074
          total_loss: -0.0018191477490795984
          vf_explained_var: 0.6327416896820068
          vf_loss: 0.010592653043568134
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,508,12504.7,508000,-2.7966,-2.07,-7.2,279.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-10-24_11-36-45
  done: false
  episode_len_mean: 279.91
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.7990999999999837
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 3
  episodes_total: 1703
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8605397793102208e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5837133387724559
          entropy_coeff: 0.009999999999999998
          kl: 0.004134289199592602
          policy_loss: -0.10773398586445385
          total_loss: -0.10500213015410635
          vf_explained_var: 0.6796685457229614
          vf_loss: 0.008568870421085093
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,509,12530.9,509000,-2.7991,-2.07,-7.2,279.91


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-10-24_11-37-12
  done: false
  episode_len_mean: 279.77
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.797699999999985
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1707
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4302698896551104e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.546635823448499
          entropy_coeff: 0.009999999999999998
          kl: 0.00461665113557817
          policy_loss: -0.1115984102918042
          total_loss: -0.1082976236111588
          vf_explained_var: 0.6549428105354309
          vf_loss: 0.0087670819937355
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,510,12557.3,510000,-2.7977,-2.07,-7.2,279.77




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-10-24_11-37-56
  done: false
  episode_len_mean: 279.07
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.7906999999999846
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 5
  episodes_total: 1712
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.151349448275552e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7470070365402434
          entropy_coeff: 0.009999999999999998
          kl: 0.05650336075123712
          policy_loss: -0.03273107194238239
          total_loss: -0.03036438851720757
          vf_explained_var: 0.6511140465736389
          vf_loss: 0.00983635178870625
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,511,12601.9,511000,-2.7907,-2.07,-7.2,279.07


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-10-24_11-38-18
  done: false
  episode_len_mean: 281.33
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.813299999999984
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 3
  episodes_total: 1715
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0727024172413331e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0401169366306728
          entropy_coeff: 0.009999999999999998
          kl: 0.029616741518847967
          policy_loss: 0.05671158838603232
          total_loss: 0.056049118604924945
          vf_explained_var: 0.48103293776512146
          vf_loss: 0.009738381293654027
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,512,12623.7,512000,-2.8133,-2.07,-7.2,281.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-10-24_11-38-43
  done: false
  episode_len_mean: 283.02
  episode_media: {}
  episode_reward_max: -2.07
  episode_reward_mean: -2.830199999999983
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1719
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6090536258619992e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.9091393742296431
          entropy_coeff: 0.009999999999999998
          kl: 0.015480545332488226
          policy_loss: -0.037102470298608144
          total_loss: -0.03662263875206311
          vf_explained_var: 0.4660647213459015
          vf_loss: 0.009570977619538705
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,513,12648,513000,-2.8302,-2.07,-7.2,283.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-10-24_11-39-07
  done: false
  episode_len_mean: 283.39
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.8338999999999834
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 3
  episodes_total: 1722
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6090536258619992e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.6442011912663778
          entropy_coeff: 0.009999999999999998
          kl: 0.027046378949677912
          policy_loss: -0.01796595172749625
          total_loss: -0.016845795181062488
          vf_explained_var: 0.2959069013595581
          vf_loss: 0.007561734856830703
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 5140

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,514,12672.7,514000,-2.8339,-2.1,-7.2,283.39


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-10-24_11-39-33
  done: false
  episode_len_mean: 282.64
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.8263999999999836
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1726
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4135804387929995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5345854749282201
          entropy_coeff: 0.009999999999999998
          kl: 0.008452983001497058
          policy_loss: -0.05268642587794198
          total_loss: -0.04670993420812819
          vf_explained_var: 0.13543978333473206
          vf_loss: 0.011322138820671374
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 5150

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,515,12698.5,515000,-2.8264,-2.1,-7.2,282.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-10-24_11-39-58
  done: false
  episode_len_mean: 281.93
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.8192999999999837
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1730
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4135804387929995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5613226572672526
          entropy_coeff: 0.009999999999999998
          kl: 0.006987334078450402
          policy_loss: 0.019066004951794942
          total_loss: 0.024970272928476332
          vf_explained_var: 0.20171453058719635
          vf_loss: 0.011517327082239919
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 5160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,516,12723,516000,-2.8193,-2.1,-7.2,281.93


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-10-24_11-40-23
  done: false
  episode_len_mean: 282.53
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.825299999999984
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1734
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4135804387929995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5278113229407204
          entropy_coeff: 0.009999999999999998
          kl: 0.006968096166348184
          policy_loss: 0.026459303663836586
          total_loss: 0.032841997841993965
          vf_explained_var: 0.2744468152523041
          vf_loss: 0.011660640676402384
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,517,12748.9,517000,-2.8253,-2.1,-7.2,282.53


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-10-24_11-40-50
  done: false
  episode_len_mean: 282.91
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.829099999999984
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1738
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4135804387929995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5880721588929494
          entropy_coeff: 0.009999999999999998
          kl: 0.006478734756539714
          policy_loss: 0.04943749482433001
          total_loss: 0.05352915525436401
          vf_explained_var: 0.20379772782325745
          vf_loss: 0.009972228823850552
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,518,12775.1,518000,-2.8291,-2.1,-7.2,282.91




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-10-24_11-41-34
  done: false
  episode_len_mean: 283.25
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.832499999999983
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1742
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4135804387929995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.5087800804111693
          entropy_coeff: 0.009999999999999998
          kl: 0.005554524547079708
          policy_loss: 0.0031841983397801715
          total_loss: 0.010487062401241726
          vf_explained_var: 0.1700788140296936
          vf_loss: 0.01239053150638938
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,519,12819,519000,-2.8325,-2.1,-7.2,283.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-10-24_11-42-00
  done: false
  episode_len_mean: 283.59
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.835899999999983
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1746
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4135804387929995e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.4914638032515844
          entropy_coeff: 0.009999999999999998
          kl: 0.0028549972748696147
          policy_loss: -0.038887200835678315
          total_loss: -0.03123098640806145
          vf_explained_var: 0.19321714341640472
          vf_loss: 0.012570790211773581
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,520,12845.5,520000,-2.8359,-2.1,-7.2,283.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-10-24_11-42-27
  done: false
  episode_len_mean: 284.15
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.841499999999984
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1750
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2067902193964997e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.3937614576684104
          entropy_coeff: 0.009999999999999998
          kl: 0.0032912605082958952
          policy_loss: -0.019486629135078853
          total_loss: -0.011488788326581319
          vf_explained_var: 0.22730326652526855
          vf_loss: 0.011935420779304371
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,521,12872.1,521000,-2.8415,-2.14,-7.2,284.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-10-24_11-42-54
  done: false
  episode_len_mean: 283.22
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.832199999999984
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 5
  episodes_total: 1755
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.033951096982499e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.42976332240634496
          entropy_coeff: 0.009999999999999998
          kl: 0.0035569313001869002
          policy_loss: -0.026712718192074034
          total_loss: -0.016163201878468196
          vf_explained_var: 0.2447114884853363
          vf_loss: 0.01484713399161895
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 522

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,522,12899.2,522000,-2.8322,-2.14,-7.2,283.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-10-24_11-43-20
  done: false
  episode_len_mean: 282.24
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8223999999999836
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1759
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0169755484912494e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4798580762412813
          entropy_coeff: 0.009999999999999998
          kl: 0.003567464512506212
          policy_loss: 0.0031117899550331964
          total_loss: 0.00984689270456632
          vf_explained_var: 0.2931298017501831
          vf_loss: 0.011533676243076722
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 5230

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,523,12925.4,523000,-2.8224,-2.14,-7.2,282.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-10-24_11-43-46
  done: false
  episode_len_mean: 281.55
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.8154999999999837
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1763
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5084877742456247e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.43870586786005233
          entropy_coeff: 0.009999999999999998
          kl: 0.003848815130185004
          policy_loss: 0.023222956765029164
          total_loss: 0.030331565770837995
          vf_explained_var: 0.2551701068878174
          vf_loss: 0.011495662935905987
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 524

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,524,12951.4,524000,-2.8155,-2.14,-7.2,281.55


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-10-24_11-44-13
  done: false
  episode_len_mean: 279.39
  episode_media: {}
  episode_reward_max: -2.1399999999999983
  episode_reward_mean: -2.793899999999984
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1767
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.542438871228123e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.40610863599512315
          entropy_coeff: 0.009999999999999998
          kl: 0.00709768048187682
          policy_loss: 0.021402665393220055
          total_loss: 0.029280959235297308
          vf_explained_var: 0.16743570566177368
          vf_loss: 0.011939376468459766
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 52500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,525,12977.8,525000,-2.7939,-2.14,-7.2,279.39




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-10-24_11-44-57
  done: false
  episode_len_mean: 277.32
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.7731999999999846
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1771
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.542438871228123e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3826733777920405
          entropy_coeff: 0.009999999999999998
          kl: 0.003852732967837142
          policy_loss: 0.024267968038717905
          total_loss: 0.033416472044256
          vf_explained_var: 0.21072015166282654
          vf_loss: 0.012975231589128574
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 526000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,526,13021.8,526000,-2.7732,-2.1,-7.2,277.32


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-10-24_11-45-22
  done: false
  episode_len_mean: 277.27
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.772699999999985
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1775
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7712194356140617e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.4909848183393478
          entropy_coeff: 0.009999999999999998
          kl: 0.006904163815742908
          policy_loss: 0.03287467120422257
          total_loss: 0.04069381695654657
          vf_explained_var: 0.24961568415164948
          vf_loss: 0.012728990045272642
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 527000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,527,13047.3,527000,-2.7727,-2.1,-7.2,277.27


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-10-24_11-45-47
  done: false
  episode_len_mean: 274.8
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.747999999999985
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 3
  episodes_total: 1778
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7712194356140617e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.4340831786394119
          entropy_coeff: 0.009999999999999998
          kl: 0.003859598919983398
          policy_loss: -0.06746382663647334
          total_loss: -0.06128900249799093
          vf_explained_var: 0.33014747500419617
          vf_loss: 0.010515658769549595
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 528000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,528,13072,528000,-2.748,-2.1,-7.2,274.8


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-10-24_11-46-14
  done: false
  episode_len_mean: 271.0
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.7099999999999858
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 5
  episodes_total: 1783
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8856097178070309e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.356591812438435
          entropy_coeff: 0.009999999999999998
          kl: 0.003763500468180483
          policy_loss: -0.021184054182635415
          total_loss: -0.010329408198595047
          vf_explained_var: 0.31189092993736267
          vf_loss: 0.01442056361378895
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 52900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,529,13099.1,529000,-2.71,-2.1,-7.2,271


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-10-24_11-46-40
  done: false
  episode_len_mean: 270.24
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.7023999999999857
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1787
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.428048589035154e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.41544548604223464
          entropy_coeff: 0.009999999999999998
          kl: 0.017769706343942933
          policy_loss: 0.04135315641760826
          total_loss: 0.04842228425873651
          vf_explained_var: 0.2758355736732483
          vf_loss: 0.011223585003366073
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 530000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,530,13125.3,530000,-2.7024,-2.1,-7.2,270.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-10-24_11-47-07
  done: false
  episode_len_mean: 269.27
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6926999999999857
  episode_reward_min: -7.199999999999891
  episodes_this_iter: 4
  episodes_total: 1791
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.428048589035154e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.3492736793226666
          entropy_coeff: 0.009999999999999998
          kl: 0.005884064363055093
          policy_loss: 0.02551363284389178
          total_loss: 0.03242107662889693
          vf_explained_var: 0.23813743889331818
          vf_loss: 0.010400180611759425
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 531000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,531,13152.6,531000,-2.6927,-2.1,-7.2,269.27


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-10-24_11-47-35
  done: false
  episode_len_mean: 256.24
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5623999999999887
  episode_reward_min: -6.099999999999914
  episodes_this_iter: 4
  episodes_total: 1795
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.428048589035154e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.279927874273724
          entropy_coeff: 0.009999999999999998
          kl: 0.0011049076717178253
          policy_loss: 0.03717425440748533
          total_loss: 0.0442720723648866
          vf_explained_var: 0.31777575612068176
          vf_loss: 0.009897098576443063
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,532,13180,532000,-2.5624,-2.1,-6.1,256.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-10-24_11-48-02
  done: false
  episode_len_mean: 251.75
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.5174999999999903
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1799
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.714024294517577e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.3095596821771728
          entropy_coeff: 0.009999999999999998
          kl: 0.01417159454063371
          policy_loss: 0.039041924228270845
          total_loss: 0.04681660665406121
          vf_explained_var: 0.1595897525548935
          vf_loss: 0.010870275563663906
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,533,13206.7,533000,-2.5175,-2.1,-3.28,251.75




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-10-24_11-48-46
  done: false
  episode_len_mean: 250.16
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5015999999999905
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1803
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.714024294517577e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.3556648913356993
          entropy_coeff: 0.009999999999999998
          kl: 0.0043478186774519555
          policy_loss: -0.11816834641827477
          total_loss: -0.10717567884259754
          vf_explained_var: 0.17843949794769287
          vf_loss: 0.014549316320982244
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,534,13251.3,534000,-2.5016,-2.05,-3.28,250.16


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-10-24_11-49-13
  done: false
  episode_len_mean: 249.57
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.495699999999991
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 5
  episodes_total: 1808
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3570121472587886e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.34927790694766575
          entropy_coeff: 0.009999999999999998
          kl: 0.0021632155923681
          policy_loss: -0.005880419909954071
          total_loss: 0.0027054035001330906
          vf_explained_var: 0.3144417405128479
          vf_loss: 0.012078602467146185
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 5350

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,535,13278,535000,-2.4957,-2.05,-3.28,249.57


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-10-24_11-49-40
  done: false
  episode_len_mean: 249.54
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4953999999999903
  episode_reward_min: -3.279999999999974
  episodes_this_iter: 4
  episodes_total: 1812
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1785060736293943e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.2978777766227722
          entropy_coeff: 0.009999999999999998
          kl: 0.002371944386834931
          policy_loss: 0.032848734574185474
          total_loss: 0.0404470719397068
          vf_explained_var: 0.23579542338848114
          vf_loss: 0.010577118769288064
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 53600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,536,13305,536000,-2.4954,-2.05,-3.28,249.54


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-10-24_11-50-08
  done: false
  episode_len_mean: 247.16
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.471599999999991
  episode_reward_min: -3.0299999999999794
  episodes_this_iter: 4
  episodes_total: 1816
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.892530368146971e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.38387033840020496
          entropy_coeff: 0.009999999999999998
          kl: 0.01674960092462467
          policy_loss: 0.023553132265806197
          total_loss: 0.030994803375667995
          vf_explained_var: 0.3006855845451355
          vf_loss: 0.011280376826309495
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 53700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,537,13332.5,537000,-2.4716,-2.05,-3.03,247.16


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-10-24_11-50-35
  done: false
  episode_len_mean: 245.66
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.456599999999992
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 1820
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.892530368146971e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.3636863440275192
          entropy_coeff: 0.009999999999999998
          kl: 0.009654761817122904
          policy_loss: 0.012501424468225903
          total_loss: 0.020901565584871502
          vf_explained_var: 0.2576420307159424
          vf_loss: 0.012037004778782527
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 53800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,538,13359.7,538000,-2.4566,-2.05,-3.02,245.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-10-24_11-51-00
  done: false
  episode_len_mean: 245.22
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4521999999999915
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 1824
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.892530368146971e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.4174816442860497
          entropy_coeff: 0.009999999999999998
          kl: 0.008379686143922843
          policy_loss: -0.013994506042864587
          total_loss: -0.005993121200137668
          vf_explained_var: 0.2703399360179901
          vf_loss: 0.012176197953522205
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 53

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,539,13385.3,539000,-2.4522,-2.05,-3.02,245.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-10-24_11-51-26
  done: false
  episode_len_mean: 245.43
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4542999999999915
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 1828
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.892530368146971e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.37938892195622126
          entropy_coeff: 0.009999999999999998
          kl: 0.06588922350077969
          policy_loss: 0.030340205836627217
          total_loss: 0.03854890722367499
          vf_explained_var: 0.27705681324005127
          vf_loss: 0.012002590640137593
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 5400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,540,13411.4,540000,-2.4543,-2.05,-3.02,245.43




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-10-24_11-52-11
  done: false
  episode_len_mean: 243.98
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4397999999999915
  episode_reward_min: -3.0199999999999796
  episodes_this_iter: 4
  episodes_total: 1832
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.838795552220456e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.46678643822669985
          entropy_coeff: 0.009999999999999998
          kl: 0.02260368910233655
          policy_loss: -0.02419080932935079
          total_loss: -0.015649058669805527
          vf_explained_var: 0.17632834613323212
          vf_loss: 0.013209615958233674
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 54

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,541,13455.4,541000,-2.4398,-2.05,-3.02,243.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-10-24_11-52-32
  done: false
  episode_len_mean: 246.39
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.4638999999999913
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 4
  episodes_total: 1836
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3258193328330684e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7064839508798388
          entropy_coeff: 0.009999999999999998
          kl: 0.0703018902031744
          policy_loss: 0.002082027701867951
          total_loss: -0.0035419565522008473
          vf_explained_var: 0.419649213552475
          vf_loss: 0.011440856558167272
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 5420

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,542,13476.9,542000,-2.4639,-2.05,-3.53,246.39


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-10-24_11-52-52
  done: false
  episode_len_mean: 248.21
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.482099999999991
  episode_reward_min: -3.5899999999999674
  episodes_this_iter: 2
  episodes_total: 1838
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9887289992496024e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.432093448109097
          entropy_coeff: 0.009999999999999998
          kl: 0.012008428284903654
          policy_loss: -0.06298018246889114
          total_loss: -0.07154181351264317
          vf_explained_var: 0.7925750613212585
          vf_loss: 0.0057592994611089425
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 5430

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,543,13496.4,543000,-2.4821,-2.05,-3.59,248.21


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-10-24_11-53-11
  done: false
  episode_len_mean: 251.27
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5126999999999904
  episode_reward_min: -3.7799999999999634
  episodes_this_iter: 3
  episodes_total: 1841
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9887289992496024e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5931327435705396
          entropy_coeff: 0.009999999999999998
          kl: 0.022295348239224595
          policy_loss: -0.08277638604243596
          total_loss: -0.09169309267567263
          vf_explained_var: 0.7621487975120544
          vf_loss: 0.007014621597611242
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,544,13516.3,544000,-2.5127,-2.05,-3.78,251.27


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-10-24_11-53-34
  done: false
  episode_len_mean: 254.14
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5413999999999897
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1845
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.983093498874404e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1418471104568906
          entropy_coeff: 0.009999999999999998
          kl: 0.02626226185013919
          policy_loss: 0.07172264895505376
          total_loss: 0.06597763101259867
          vf_explained_var: 0.8326097130775452
          vf_loss: 0.005673453527399236
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 545000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,545,13538.4,545000,-2.5414,-2.05,-3.98,254.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-10-24_11-53-59
  done: false
  episode_len_mean: 254.4
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5439999999999894
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 1848
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.474640248311605e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.682203949160046
          entropy_coeff: 0.009999999999999998
          kl: 0.009956177743134937
          policy_loss: -0.11100116819143295
          total_loss: -0.11245684151848158
          vf_explained_var: 0.83698570728302
          vf_loss: 0.005366367986425757
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 546000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,546,13564.1,546000,-2.544,-2.05,-3.98,254.4


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-10-24_11-54-24
  done: false
  episode_len_mean: 255.14
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.55139999999999
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1852
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.474640248311605e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8785664631260766
          entropy_coeff: 0.009999999999999998
          kl: 0.008848443607329809
          policy_loss: 0.01970855188038614
          total_loss: 0.015604543023639256
          vf_explained_var: 0.8395980596542358
          vf_loss: 0.00468165589393013
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 547000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,547,13588.9,547000,-2.5514,-2.05,-3.98,255.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-10-24_11-54-49
  done: false
  episode_len_mean: 255.93
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5592999999999893
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1856
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.474640248311605e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6515689724021487
          entropy_coeff: 0.009999999999999998
          kl: 0.005462459130952861
          policy_loss: 0.030607139526142014
          total_loss: 0.031001874307791392
          vf_explained_var: 0.719222366809845
          vf_loss: 0.006910422465039624
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 548000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,548,13614.1,548000,-2.5593,-2.05,-3.98,255.93




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-10-24_11-55-32
  done: false
  episode_len_mean: 256.13
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.561299999999989
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1860
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.474640248311605e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6216984113057454
          entropy_coeff: 0.009999999999999998
          kl: 0.0073547290177239846
          policy_loss: 0.022236015937394566
          total_loss: 0.023632401848832765
          vf_explained_var: 0.6140222549438477
          vf_loss: 0.007613371695495314
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 54900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,549,13656.3,549000,-2.5613,-2.05,-3.98,256.13


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-10-24_11-55-58
  done: false
  episode_len_mean: 256.64
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.566399999999989
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1864
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.474640248311605e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8887571414311727
          entropy_coeff: 0.009999999999999998
          kl: 0.004074078619960315
          policy_loss: 0.046714900268448725
          total_loss: 0.04703953920139207
          vf_explained_var: 0.3843154013156891
          vf_loss: 0.009212212078273297
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 550000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,550,13683,550000,-2.5664,-2.05,-3.98,256.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-10-24_11-56-24
  done: false
  episode_len_mean: 257.16
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.571599999999989
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1868
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2373201241558026e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8287661850452424
          entropy_coeff: 0.009999999999999998
          kl: 0.010545532989194584
          policy_loss: 0.024056573522587617
          total_loss: 0.024296468123793602
          vf_explained_var: 0.5154550075531006
          vf_loss: 0.008527555151118173
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 55100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,551,13708.2,551000,-2.5716,-2.05,-3.98,257.16


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-10-24_11-56-51
  done: false
  episode_len_mean: 256.99
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5698999999999894
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1872
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2373201241558026e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.4735585242509842
          entropy_coeff: 0.009999999999999998
          kl: 0.04015854026742976
          policy_loss: 0.030096863706906635
          total_loss: 0.034673677302069134
          vf_explained_var: 0.35856014490127563
          vf_loss: 0.009312400221824646
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 5520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,552,13735.4,552000,-2.5699,-2.05,-3.98,256.99


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-10-24_11-57-18
  done: false
  episode_len_mean: 255.7
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5569999999999897
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1876
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3559801862337036e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.43757112092441985
          entropy_coeff: 0.009999999999999998
          kl: 0.0015073330261004337
          policy_loss: 0.009423713137706121
          total_loss: 0.014293384220865037
          vf_explained_var: 0.37402060627937317
          vf_loss: 0.009245382042394744
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,553,13762.2,553000,-2.557,-2.05,-3.98,255.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-10-24_11-57-44
  done: false
  episode_len_mean: 255.29
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5528999999999895
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1880
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6779900931168518e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.525338128540251
          entropy_coeff: 0.009999999999999998
          kl: 0.04732078049320852
          policy_loss: -0.0849457243250476
          total_loss: -0.08099377411935064
          vf_explained_var: 0.38728469610214233
          vf_loss: 0.009205329418182372
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,554,13789,554000,-2.5529,-2.05,-3.98,255.29


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-10-24_11-58-11
  done: false
  episode_len_mean: 255.18
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.55179999999999
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 5
  episodes_total: 1885
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.516985139675278e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.4531922909948561
          entropy_coeff: 0.009999999999999998
          kl: 0.009700516093441535
          policy_loss: -0.0331960620979468
          total_loss: -0.026992815732955932
          vf_explained_var: 0.44821515679359436
          vf_loss: 0.010735169922312101
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 555000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,555,13816.1,555000,-2.5518,-2.05,-3.98,255.18


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-10-24_11-58-38
  done: false
  episode_len_mean: 255.14
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5513999999999895
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1889
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.516985139675278e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5164819406138526
          entropy_coeff: 0.009999999999999998
          kl: 0.07420342101316414
          policy_loss: -0.00827166193889247
          total_loss: -0.004716307214564747
          vf_explained_var: 0.4201187491416931
          vf_loss: 0.008720171311870218
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 55600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,556,13842.8,556000,-2.5514,-2.05,-3.98,255.14




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-10-24_11-59-21
  done: false
  episode_len_mean: 255.67
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5566999999999895
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1893
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7754777095129166e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6820924205912484
          entropy_coeff: 0.009999999999999998
          kl: 0.016394527360354714
          policy_loss: 0.04664570829934544
          total_loss: 0.0491195195251041
          vf_explained_var: 0.3460451066493988
          vf_loss: 0.009294736271517143
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 557000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,557,13885.3,557000,-2.5567,-2.05,-3.98,255.67


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-10-24_11-59-46
  done: false
  episode_len_mean: 256.14
  episode_media: {}
  episode_reward_max: -2.0500000000000003
  episode_reward_mean: -2.5613999999999892
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1897
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7754777095129166e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7354574667082893
          entropy_coeff: 0.009999999999999998
          kl: 0.013001473213118212
          policy_loss: 0.035805812726418175
          total_loss: 0.03653710107836458
          vf_explained_var: 0.17765267193317413
          vf_loss: 0.008085859848910736
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 5580

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,558,13911,558000,-2.5614,-2.05,-3.98,256.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-10-24_12-00-12
  done: false
  episode_len_mean: 257.09
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.570899999999989
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 1900
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7754777095129166e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7474357181125217
          entropy_coeff: 0.009999999999999998
          kl: 0.006759250928676784
          policy_loss: -0.09650418501761225
          total_loss: -0.09275311157107353
          vf_explained_var: 0.0973237007856369
          vf_loss: 0.011225434361646573
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 55900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,559,13936.4,559000,-2.5709,-2.13,-3.98,257.09


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-10-24_12-00-37
  done: false
  episode_len_mean: 257.75
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5774999999999886
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1904
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7754777095129166e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6861522959338294
          entropy_coeff: 0.009999999999999998
          kl: 0.004306217276189747
          policy_loss: -0.040329607865876624
          total_loss: -0.035168773949974114
          vf_explained_var: 0.10343922674655914
          vf_loss: 0.012022358965542582
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,560,13962,560000,-2.5775,-2.13,-3.98,257.75


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-10-24_12-01-03
  done: false
  episode_len_mean: 258.22
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5821999999999887
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1908
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8877388547564583e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6716618822680579
          entropy_coeff: 0.009999999999999998
          kl: 0.003090883992428041
          policy_loss: -0.027518025868468816
          total_loss: -0.021839663262168567
          vf_explained_var: 0.1264062523841858
          vf_loss: 0.012394981065558062
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 56

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,561,13987.5,561000,-2.5822,-2.13,-3.98,258.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-10-24_12-01-28
  done: false
  episode_len_mean: 259.03
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5902999999999885
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1912
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.438694273782292e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6941922995779249
          entropy_coeff: 0.009999999999999998
          kl: 0.010349722063098138
          policy_loss: -0.002494229707452986
          total_loss: 0.003461757302284241
          vf_explained_var: 0.09892917424440384
          vf_loss: 0.012897908522023094
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,562,14012.6,562000,-2.5903,-2.13,-3.98,259.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-10-24_12-01-54
  done: false
  episode_len_mean: 259.61
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.5960999999999883
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1916
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.438694273782292e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5737013873126772
          entropy_coeff: 0.009999999999999998
          kl: 0.00809524835715886
          policy_loss: 0.005197364423010085
          total_loss: 0.012629632362061077
          vf_explained_var: 0.10711085796356201
          vf_loss: 0.013169282685137458
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 56300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,563,14038.6,563000,-2.5961,-2.13,-3.98,259.61




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-10-24_12-02-37
  done: false
  episode_len_mean: 259.73
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.597299999999988
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1920
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.438694273782292e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.5553179615073733
          entropy_coeff: 0.009999999999999998
          kl: 0.012892313556046985
          policy_loss: -0.0729796924524837
          total_loss: -0.06519451629784372
          vf_explained_var: 0.11614947766065598
          vf_loss: 0.01333835870027542
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 564000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,564,14081.1,564000,-2.5973,-2.13,-3.98,259.73


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-10-24_12-03-01
  done: false
  episode_len_mean: 260.62
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6061999999999883
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1924
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.438694273782292e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8524296320146985
          entropy_coeff: 0.009999999999999998
          kl: 0.009770090049369963
          policy_loss: 0.016825306332773633
          total_loss: 0.022282702310217752
          vf_explained_var: 0.16203758120536804
          vf_loss: 0.013981691561639309
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 5650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,565,14105.7,565000,-2.6062,-2.13,-3.98,260.62


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-10-24_12-03-28
  done: false
  episode_len_mean: 260.14
  episode_media: {}
  episode_reward_max: -2.1299999999999986
  episode_reward_mean: -2.6013999999999884
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1928
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.438694273782292e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.49485656751526724
          entropy_coeff: 0.009999999999999998
          kl: 0.004959033529778544
          policy_loss: 0.031668497870365776
          total_loss: 0.03961907318896717
          vf_explained_var: 0.17286460101604462
          vf_loss: 0.01289914415942298
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 56600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,566,14132.5,566000,-2.6014,-2.13,-3.98,260.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-10-24_12-03-55
  done: false
  episode_len_mean: 260.41
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.604099999999988
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1932
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.719347136891146e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.4171990089946323
          entropy_coeff: 0.009999999999999998
          kl: 0.010920371326678138
          policy_loss: -0.003667505457997322
          total_loss: 0.005387097141808934
          vf_explained_var: 0.1579742431640625
          vf_loss: 0.01322659600733055
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,567,14159.6,567000,-2.6041,-2.16,-3.98,260.41


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-10-24_12-04-22
  done: false
  episode_len_mean: 257.94
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.5793999999999886
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1936
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.719347136891146e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.424225910504659
          entropy_coeff: 0.009999999999999998
          kl: 0.008751034944123623
          policy_loss: -0.020509127775828043
          total_loss: -0.012161132941643397
          vf_explained_var: 0.22433693706989288
          vf_loss: 0.012590250714371603
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 5680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,568,14186.6,568000,-2.5794,-2.16,-3.98,257.94


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-10-24_12-04-49
  done: false
  episode_len_mean: 254.32
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.5431999999999895
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 1940
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.719347136891146e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.4325216127766503
          entropy_coeff: 0.009999999999999998
          kl: 0.0037709548750576184
          policy_loss: -0.11081975085867776
          total_loss: -0.10030954082806905
          vf_explained_var: 0.2290593832731247
          vf_loss: 0.01483542485576537
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,569,14213.4,569000,-2.5432,-2.16,-3.98,254.32


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-10-24_12-05-15
  done: false
  episode_len_mean: 249.98
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.4997999999999907
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 5
  episodes_total: 1945
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.4671149641275406
          entropy_coeff: 0.009999999999999998
          kl: 0.005537875417540919
          policy_loss: -0.015977869969275263
          total_loss: -0.006145745515823364
          vf_explained_var: 0.1814754754304886
          vf_loss: 0.014503276958647702
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,570,14239.8,570000,-2.4998,-2.16,-2.99,249.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-10-24_12-05-42
  done: false
  episode_len_mean: 249.44
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.494399999999991
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1949
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.359673568445573e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.40743096470832824
          entropy_coeff: 0.009999999999999998
          kl: 0.003252639442402404
          policy_loss: 0.02852627577053176
          total_loss: 0.03645797140068478
          vf_explained_var: 0.17524351179599762
          vf_loss: 0.0120060076419678
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,571,14266.2,571000,-2.4944,-2.16,-2.99,249.44




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-10-24_12-06-27
  done: false
  episode_len_mean: 248.65
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.486499999999991
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1953
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1798367842227865e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6331920428408517
          entropy_coeff: 0.009999999999999998
          kl: 0.01342724084166428
          policy_loss: 0.02231261647409863
          total_loss: 0.029094879743125704
          vf_explained_var: 0.21287128329277039
          vf_loss: 0.013114185341530376
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 572000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,572,14311,572000,-2.4865,-2.15,-2.99,248.65


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-10-24_12-06-54
  done: false
  episode_len_mean: 247.92
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.4791999999999907
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1957
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1798367842227865e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.36079031725724536
          entropy_coeff: 0.009999999999999998
          kl: 0.003038981533372641
          policy_loss: 0.033946930203172895
          total_loss: 0.0418365048037635
          vf_explained_var: 0.16878364980220795
          vf_loss: 0.011497475403464503
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 5730

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,573,14337.8,573000,-2.4792,-2.15,-2.99,247.92


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-10-24_12-07-20
  done: false
  episode_len_mean: 247.57
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.475699999999991
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1961
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.899183921113932e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.43841793537139895
          entropy_coeff: 0.009999999999999998
          kl: 0.016874546112170683
          policy_loss: 0.030855158468087514
          total_loss: 0.0378107321759065
          vf_explained_var: 0.14101988077163696
          vf_loss: 0.01133975059621864
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 574000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,574,14364.7,574000,-2.4757,-2.15,-2.99,247.57


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-10-24_12-07-48
  done: false
  episode_len_mean: 246.76
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.467599999999991
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1965
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.899183921113932e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.35905614925755397
          entropy_coeff: 0.009999999999999998
          kl: 0.0030180746277944574
          policy_loss: -0.04611441464059883
          total_loss: -0.03716706782579422
          vf_explained_var: 0.19752106070518494
          vf_loss: 0.012537906598299741
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 575

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,575,14391.8,575000,-2.4676,-2.15,-2.99,246.76


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-10-24_12-08-15
  done: false
  episode_len_mean: 245.96
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.459599999999991
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 5
  episodes_total: 1970
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.949591960556966e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.38261829846435125
          entropy_coeff: 0.009999999999999998
          kl: 0.0016950818388494326
          policy_loss: -0.02317191883921623
          total_loss: -0.01172744772500462
          vf_explained_var: 0.2584337592124939
          vf_loss: 0.015270654836462604
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 5760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,576,14419.3,576000,-2.4596,-2.15,-2.99,245.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-10-24_12-08-42
  done: false
  episode_len_mean: 245.81
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.458099999999991
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1974
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.474795980278483e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.4580278714497884
          entropy_coeff: 0.009999999999999998
          kl: 0.006345702458136613
          policy_loss: 0.023733512229389613
          total_loss: 0.030621149929033385
          vf_explained_var: 0.2294161468744278
          vf_loss: 0.011467918381094932
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 577000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,577,14446.4,577000,-2.4581,-2.15,-2.99,245.81


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-10-24_12-09-09
  done: false
  episode_len_mean: 245.8
  episode_media: {}
  episode_reward_max: -2.149999999999998
  episode_reward_mean: -2.4579999999999917
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1978
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.474795980278483e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.435955724451277
          entropy_coeff: 0.009999999999999998
          kl: 0.0029458148766622117
          policy_loss: 0.04543743228746785
          total_loss: 0.05298306941986084
          vf_explained_var: 0.13853837549686432
          vf_loss: 0.011905193515121936
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 578000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,578,14473.4,578000,-2.458,-2.15,-2.99,245.8




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-10-24_12-09-55
  done: false
  episode_len_mean: 245.44
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4543999999999913
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1982
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.373979901392415e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6354667915238275
          entropy_coeff: 0.009999999999999998
          kl: 0.020221545073913758
          policy_loss: -0.002016909213529693
          total_loss: 0.005701409818397628
          vf_explained_var: 0.19814838469028473
          vf_loss: 0.014072984457015991
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 57

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,579,14518.8,579000,-2.4544,-2.09,-2.99,245.44


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-10-24_12-10-21
  done: false
  episode_len_mean: 245.85
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4584999999999915
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1986
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1060969852088628e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8680082824495103
          entropy_coeff: 0.009999999999999998
          kl: 0.025676223340059798
          policy_loss: -0.007304254670937856
          total_loss: -0.0007324756019645267
          vf_explained_var: 0.11800334602594376
          vf_loss: 0.01525186499994662
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,580,14544.8,580000,-2.4585,-2.09,-2.99,245.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-10-24_12-10-48
  done: false
  episode_len_mean: 246.15
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.461499999999991
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1990
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6591454778132942e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.4794205652342902
          entropy_coeff: 0.009999999999999998
          kl: 0.003867693637155551
          policy_loss: -0.033958011782831615
          total_loss: -0.02598045567671458
          vf_explained_var: 0.22780664265155792
          vf_loss: 0.012771761883050204
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,581,14572,581000,-2.4615,-2.09,-2.99,246.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-10-24_12-11-15
  done: false
  episode_len_mean: 245.18
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.451799999999992
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 5
  episodes_total: 1995
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.295727389066471e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5339906026919683
          entropy_coeff: 0.009999999999999998
          kl: 0.028590124405157902
          policy_loss: -0.017584571987390517
          total_loss: -0.007254841840929455
          vf_explained_var: 0.19832825660705566
          vf_loss: 0.01566963577643037
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 582

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,582,14598.8,582000,-2.4518,-2.09,-2.99,245.18


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-10-24_12-11-42
  done: false
  episode_len_mean: 244.36
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.443599999999992
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 1999
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2443591083599698e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.39046325782934826
          entropy_coeff: 0.009999999999999998
          kl: 0.003905647883482288
          policy_loss: 0.029347772316800223
          total_loss: 0.038024432957172394
          vf_explained_var: 0.17193497717380524
          vf_loss: 0.012581297051575449
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 58

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,583,14625.6,583000,-2.4436,-2.09,-2.99,244.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-10-24_12-12-09
  done: false
  episode_len_mean: 243.59
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.435899999999992
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 2003
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.221795541799849e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.4145759807692634
          entropy_coeff: 0.009999999999999998
          kl: 0.01064561889641994
          policy_loss: 0.049137947335839274
          total_loss: 0.05596739041308562
          vf_explained_var: 0.13269931077957153
          vf_loss: 0.010975199363504847
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,584,14653.1,584000,-2.4359,-2.09,-2.99,243.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-10-24_12-12-37
  done: false
  episode_len_mean: 243.1
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.430999999999992
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 2007
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.221795541799849e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.5075914690891902
          entropy_coeff: 0.009999999999999998
          kl: 0.002817897657348962
          policy_loss: 0.007773360030518638
          total_loss: 0.017610140558746126
          vf_explained_var: 0.06406576186418533
          vf_loss: 0.014912694527043236
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 58500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,585,14680.6,585000,-2.431,-2.09,-2.99,243.1




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-10-24_12-13-22
  done: false
  episode_len_mean: 242.12
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4211999999999922
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 2011
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1108977708999245e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6544090078936683
          entropy_coeff: 0.009999999999999998
          kl: 0.0052526710313276544
          policy_loss: -0.03858882329530186
          total_loss: -0.030771852739983136
          vf_explained_var: 0.15760093927383423
          vf_loss: 0.01436106013134122
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,586,14725.7,586000,-2.4212,-2.09,-2.99,242.12


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-10-24_12-13-47
  done: false
  episode_len_mean: 242.12
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4211999999999922
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 2015
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1108977708999245e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7274345917834176
          entropy_coeff: 0.009999999999999998
          kl: 0.011137245298146543
          policy_loss: -0.022230663647254307
          total_loss: -0.014860584007369147
          vf_explained_var: 0.12628117203712463
          vf_loss: 0.014644428715109825
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,587,14751.5,587000,-2.4212,-2.09,-2.99,242.12


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-10-24_12-14-14
  done: false
  episode_len_mean: 241.88
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4187999999999925
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 5
  episodes_total: 2020
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1108977708999245e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.4808391359117296
          entropy_coeff: 0.009999999999999998
          kl: 0.011148158426731274
          policy_loss: -0.026873487399684058
          total_loss: -0.01650820407602522
          vf_explained_var: 0.29230797290802
          vf_loss: 0.015173672512173653
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 5880

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,588,14778.5,588000,-2.4188,-2.09,-2.99,241.88


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-10-24_12-14-41
  done: false
  episode_len_mean: 240.55
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.405499999999993
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 2024
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1108977708999245e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.6147915449407365
          entropy_coeff: 0.009999999999999998
          kl: 0.09692482436575713
          policy_loss: -0.0002727766417794757
          total_loss: 0.006746742419070668
          vf_explained_var: 0.3742962181568146
          vf_loss: 0.013167433181984557
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 589

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,589,14805.4,589000,-2.4055,-2.09,-2.92,240.55


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-10-24_12-15-04
  done: false
  episode_len_mean: 242.51
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.425099999999992
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 2027
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.666346656349889e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7969896899329292
          entropy_coeff: 0.009999999999999998
          kl: 0.008246259110973694
          policy_loss: 0.06567198269897037
          total_loss: 0.06807366613712576
          vf_explained_var: -0.0062284390442073345
          vf_loss: 0.010371579924443116
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,590,14827.5,590000,-2.4251,-2.09,-3.73,242.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-10-24_12-15-30
  done: false
  episode_len_mean: 242.67
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4266999999999914
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 2031
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.666346656349889e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7334346466594273
          entropy_coeff: 0.009999999999999998
          kl: 0.017331918791820694
          policy_loss: 0.021758136401573817
          total_loss: 0.02753267354435391
          vf_explained_var: 0.18052837252616882
          vf_loss: 0.013108884574224551
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 5910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,591,14853.8,591000,-2.4267,-2.09,-3.73,242.67


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-10-24_12-15-55
  done: false
  episode_len_mean: 243.55
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.435499999999992
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 4
  episodes_total: 2035
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.666346656349889e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.8088819762070973
          entropy_coeff: 0.009999999999999998
          kl: 0.02151621143995399
          policy_loss: 0.006058110131157769
          total_loss: 0.012688508133093516
          vf_explained_var: 0.20114588737487793
          vf_loss: 0.014719215790844627
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 59200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,592,14878.6,592000,-2.4355,-2.09,-3.73,243.55


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-10-24_12-16-19
  done: false
  episode_len_mean: 244.94
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4493999999999914
  episode_reward_min: -3.7299999999999645
  episodes_this_iter: 3
  episodes_total: 2038
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.999519984524835e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.0873972747060987
          entropy_coeff: 0.009999999999999998
          kl: 0.027854351416620994
          policy_loss: -0.10608340998490652
          total_loss: -0.10269075781106948
          vf_explained_var: 0.26793497800827026
          vf_loss: 0.014266625646915701
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 593

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,593,14903.1,593000,-2.4494,-2.09,-3.73,244.94




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-10-24_12-16-58
  done: false
  episode_len_mean: 247.17
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.471699999999991
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2042
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.049927997678725e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1050734096103245
          entropy_coeff: 0.009999999999999998
          kl: 0.020135576572257858
          policy_loss: 0.013966862029499478
          total_loss: 0.01728388261463907
          vf_explained_var: -0.19213280081748962
          vf_loss: 0.014367751286934233
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 59400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,594,14942.2,594000,-2.4717,-2.09,-3.98,247.17


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-10-24_12-17-24
  done: false
  episode_len_mean: 247.7
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.476999999999991
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 2045
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5748919965180868e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.976018018854989
          entropy_coeff: 0.009999999999999998
          kl: 0.006590675061787168
          policy_loss: -0.11401448340879547
          total_loss: -0.11152775651878781
          vf_explained_var: 0.3369612693786621
          vf_loss: 0.01224690725406011
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 595000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,595,14968.3,595000,-2.477,-2.09,-3.98,247.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-10-24_12-17-50
  done: false
  episode_len_mean: 248.26
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.482599999999991
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2049
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5748919965180868e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8913013577461243
          entropy_coeff: 0.009999999999999998
          kl: 0.006149834362446743
          policy_loss: -0.050387876563602024
          total_loss: -0.048295865125126305
          vf_explained_var: 0.3095908463001251
          vf_loss: 0.011005026841950085
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 596

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,596,14993.5,596000,-2.4826,-2.09,-3.98,248.26


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-10-24_12-18-15
  done: false
  episode_len_mean: 248.8
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.4879999999999907
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2053
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5748919965180868e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9458019693692525
          entropy_coeff: 0.009999999999999998
          kl: 0.005170070201267802
          policy_loss: -0.021342499885294172
          total_loss: -0.018001502421167163
          vf_explained_var: 0.2572978734970093
          vf_loss: 0.01279901655183898
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 5970

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,597,15018.8,597000,-2.488,-2.09,-3.98,248.8


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-10-24_12-18-42
  done: false
  episode_len_mean: 249.34
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.493399999999991
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2057
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5748919965180868e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8783662352297041
          entropy_coeff: 0.009999999999999998
          kl: 0.032404906299176
          policy_loss: -0.02538213311798043
          total_loss: -0.021828343719244005
          vf_explained_var: 0.26639971137046814
          vf_loss: 0.01233745204905669
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,598,15045.3,598000,-2.4934,-2.09,-3.98,249.34


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-10-24_12-19-04
  done: false
  episode_len_mean: 251.12
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5111999999999903
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2061
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3623379947771303e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0386796156565348
          entropy_coeff: 0.009999999999999998
          kl: 0.0076133524140508035
          policy_loss: 0.0012565154996183184
          total_loss: 0.003702343710594707
          vf_explained_var: 0.30372852087020874
          vf_loss: 0.012832623088939323
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,599,15067.8,599000,-2.5112,-2.09,-3.98,251.12


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-10-24_12-19-25
  done: false
  episode_len_mean: 253.42
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.53419999999999
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 2064
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3623379947771303e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1700080076853434
          entropy_coeff: 0.009999999999999998
          kl: 0.023676391427723806
          policy_loss: 0.05302559609214465
          total_loss: 0.05232046105795436
          vf_explained_var: 0.015708820894360542
          vf_loss: 0.010994945381147167
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,600,15089.1,600000,-2.5342,-2.09,-3.98,253.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-10-24_12-19-47
  done: false
  episode_len_mean: 255.95
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5594999999999892
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 2067
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.543506992165697e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.162700852420595
          entropy_coeff: 0.009999999999999998
          kl: 0.024664668558733005
          policy_loss: 0.04884899664256308
          total_loss: 0.0486115259428819
          vf_explained_var: -0.28047946095466614
          vf_loss: 0.01138954082206409
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 601000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,601,15110.6,601000,-2.5595,-2.09,-3.98,255.95




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-10-24_12-20-29
  done: false
  episode_len_mean: 257.5
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.574999999999989
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2071
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.315260488248543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2577932476997375
          entropy_coeff: 0.009999999999999998
          kl: 0.012469375950242225
          policy_loss: 0.02063959530658192
          total_loss: 0.022496802856524785
          vf_explained_var: 0.12146449834108353
          vf_loss: 0.014435141512917148
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 602000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,602,15152.6,602000,-2.575,-2.09,-3.98,257.5


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-10-24_12-20-53
  done: false
  episode_len_mean: 258.86
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.588599999999989
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 2074
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.315260488248543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3943757613499959
          entropy_coeff: 0.009999999999999998
          kl: 0.014816432138289
          policy_loss: -0.015306584040323893
          total_loss: -0.018399564425150554
          vf_explained_var: 0.02991805598139763
          vf_loss: 0.010850779351312668
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 603000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,603,15176.9,603000,-2.5886,-2.09,-3.98,258.86


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-10-24_12-21-17
  done: false
  episode_len_mean: 259.96
  episode_media: {}
  episode_reward_max: -2.0899999999999994
  episode_reward_mean: -2.5995999999999877
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2078
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.315260488248543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3320763508478801
          entropy_coeff: 0.009999999999999998
          kl: 0.010510307446632419
          policy_loss: 0.02074172335366408
          total_loss: 0.022200654529862935
          vf_explained_var: 0.06831227242946625
          vf_loss: 0.014779693633317947
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained: 60400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,604,15201.1,604000,-2.5996,-2.09,-3.98,259.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-10-24_12-21-42
  done: false
  episode_len_mean: 261.7
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.616999999999988
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2082
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.315260488248543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3078791830274794
          entropy_coeff: 0.009999999999999998
          kl: 0.00669482740642593
          policy_loss: 0.005007090419530869
          total_loss: 0.006708661963542303
          vf_explained_var: 0.150435209274292
          vf_loss: 0.014780360087752343
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 605000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,605,15225.2,605000,-2.617,-2.1,-3.98,261.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-10-24_12-22-03
  done: false
  episode_len_mean: 263.44
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.634399999999988
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 2085
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.315260488248543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.250424975819058
          entropy_coeff: 0.009999999999999998
          kl: 0.010453644889710428
          policy_loss: 0.06243960890505049
          total_loss: 0.06200102037853665
          vf_explained_var: 0.051157597452402115
          vf_loss: 0.012065661843452188
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 606000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,606,15247,606000,-2.6344,-2.1,-3.98,263.44


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-10-24_12-22-27
  done: false
  episode_len_mean: 264.13
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6412999999999873
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 2088
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.315260488248543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2221005055639478
          entropy_coeff: 0.009999999999999998
          kl: 0.007443922148033898
          policy_loss: -0.10642538401815627
          total_loss: -0.10284493110246129
          vf_explained_var: 0.08027313649654388
          vf_loss: 0.015801458826495543
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 60700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,607,15270.9,607000,-2.6413,-2.1,-3.98,264.13


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-10-24_12-22-53
  done: false
  episode_len_mean: 265.24
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6523999999999863
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2092
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.315260488248543e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1908550063769023
          entropy_coeff: 0.009999999999999998
          kl: 0.02135262339701656
          policy_loss: -0.042011533967322774
          total_loss: -0.038491058722138406
          vf_explained_var: 0.16601549088954926
          vf_loss: 0.015429025267561276
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 6080

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,608,15296.7,608000,-2.6524,-2.1,-3.98,265.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-10-24_12-23-18
  done: false
  episode_len_mean: 266.3
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.662999999999987
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2096
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.1107832656966314
          entropy_coeff: 0.009999999999999998
          kl: 0.008519858664151335
          policy_loss: 0.010103808633155293
          total_loss: 0.012594006251957682
          vf_explained_var: 0.20803555846214294
          vf_loss: 0.013598029926005338
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 609000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,609,15321.2,609000,-2.663,-2.1,-3.98,266.3




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-10-24_12-23-57
  done: false
  episode_len_mean: 267.74
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6773999999999862
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2100
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.081985976960924
          entropy_coeff: 0.009999999999999998
          kl: 0.009315678127096778
          policy_loss: -0.00014061248964733548
          total_loss: 0.004899183660745621
          vf_explained_var: 0.11387602239847183
          vf_loss: 0.01585965331436859
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 6100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,610,15360.8,610000,-2.6774,-2.1,-3.98,267.74


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-10-24_12-24-22
  done: false
  episode_len_mean: 269.18
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.6917999999999864
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 2103
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2052092128329808
          entropy_coeff: 0.009999999999999998
          kl: 0.00650533737848718
          policy_loss: 0.03190535224146313
          total_loss: 0.030993776188956365
          vf_explained_var: -0.01983870193362236
          vf_loss: 0.011140514006062101
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 611000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,611,15386,611000,-2.6918,-2.1,-3.98,269.18


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-10-24_12-24-45
  done: false
  episode_len_mean: 271.42
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -2.7141999999999857
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2107
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0867418825626374
          entropy_coeff: 0.009999999999999998
          kl: 0.008430559412366673
          policy_loss: 0.010051490449243122
          total_loss: 0.015138094789452022
          vf_explained_var: 0.03958673030138016
          vf_loss: 0.015954023133963345
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 61200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,612,15408.4,612000,-2.7142,-2.1,-3.98,271.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-10-24_12-25-10
  done: false
  episode_len_mean: 272.51
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.7250999999999856
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 3
  episodes_total: 2110
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.9981858902507358
          entropy_coeff: 0.009999999999999998
          kl: 0.011233419788723253
          policy_loss: -0.02997674619158109
          total_loss: -0.02877767003244824
          vf_explained_var: 0.13845546543598175
          vf_loss: 0.011180933595945438
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 61300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,613,15433.5,613000,-2.7251,-2.16,-3.98,272.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-10-24_12-25-35
  done: false
  episode_len_mean: 272.88
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.7287999999999863
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2114
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0082874609364403
          entropy_coeff: 0.009999999999999998
          kl: 0.005268330767110759
          policy_loss: 0.013308463825119866
          total_loss: 0.018110796643628014
          vf_explained_var: 0.12658607959747314
          vf_loss: 0.014885205837587515
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 61400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,614,15458,614000,-2.7288,-2.16,-3.98,272.88


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-10-24_12-25-59
  done: false
  episode_len_mean: 273.96
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.739599999999985
  episode_reward_min: -3.979999999999959
  episodes_this_iter: 4
  episodes_total: 2118
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.972890732372816e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0598688589202032
          entropy_coeff: 0.009999999999999998
          kl: 0.03643278399268814
          policy_loss: 0.03071789683567153
          total_loss: 0.03455770305461354
          vf_explained_var: 0.18257728219032288
          vf_loss: 0.014438495816042026
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,615,15482.3,615000,-2.7396,-2.16,-3.98,273.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-10-24_12-26-18
  done: false
  episode_len_mean: 277.64
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.7763999999999847
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2121
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1959336098559223e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9160794284608629
          entropy_coeff: 0.009999999999999998
          kl: 0.01199255651436566
          policy_loss: 0.03981558221081893
          total_loss: 0.0424196705636051
          vf_explained_var: 0.17983953654766083
          vf_loss: 0.011764882735830422
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,616,15501.5,616000,-2.7764,-2.16,-4.01,277.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-10-24_12-26-39
  done: false
  episode_len_mean: 280.59
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.805899999999984
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2124
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1959336098559223e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9856385787328085
          entropy_coeff: 0.009999999999999998
          kl: 0.013462601659349913
          policy_loss: 0.009396098885271284
          total_loss: 0.011380418886741002
          vf_explained_var: 0.24659638106822968
          vf_loss: 0.011840708242056684
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 61700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,617,15521.9,617000,-2.8059,-2.16,-4.01,280.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-10-24_12-26-58
  done: false
  episode_len_mean: 280.86
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.8085999999999838
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 2
  episodes_total: 2126
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1959336098559223e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0285078697734409
          entropy_coeff: 0.009999999999999998
          kl: 0.011309279656664097
          policy_loss: -0.0914161612590154
          total_loss: -0.08993727829721239
          vf_explained_var: 0.004627023823559284
          vf_loss: 0.011763963524976538
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 6180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,618,15541.2,618000,-2.8086,-2.16,-4.01,280.86




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-10-24_12-27-36
  done: false
  episode_len_mean: 283.79
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.837899999999983
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2130
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1959336098559223e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1491263773706224
          entropy_coeff: 0.009999999999999998
          kl: 0.02390836518476319
          policy_loss: -0.002963510693775283
          total_loss: 0.0020162138673994277
          vf_explained_var: 0.12869036197662354
          vf_loss: 0.016470985487103462
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 6190

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,619,15579.1,619000,-2.8379,-2.16,-4.01,283.79


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-10-24_12-28-01
  done: false
  episode_len_mean: 284.71
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.847099999999983
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2133
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7939004147838834e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1267242550849914
          entropy_coeff: 0.009999999999999998
          kl: 0.0190750429888668
          policy_loss: 0.04881604106889831
          total_loss: 0.04886771995160315
          vf_explained_var: 0.0921858474612236
          vf_loss: 0.011318921820364064
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,620,15603.9,620000,-2.8471,-2.16,-4.01,284.71


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-10-24_12-28-25
  done: false
  episode_len_mean: 284.79
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.8478999999999832
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2137
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7939004147838834e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1360572947396173
          entropy_coeff: 0.009999999999999998
          kl: 0.006979998270812062
          policy_loss: 0.01476885403196017
          total_loss: 0.018873012479808597
          vf_explained_var: 0.1427784264087677
          vf_loss: 0.015464732454468806
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 621000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,621,15628,621000,-2.8479,-2.16,-4.01,284.79


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-10-24_12-28-50
  done: false
  episode_len_mean: 285.08
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8507999999999822
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2140
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7939004147838834e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.062770081890954
          entropy_coeff: 0.009999999999999998
          kl: 0.020203726739024732
          policy_loss: -0.10674496011601554
          total_loss: -0.10266801921857728
          vf_explained_var: 0.2042941004037857
          vf_loss: 0.01470463913347986
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,622,15653.7,622000,-2.8508,-2.42,-4.01,285.08


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-10-24_12-29-16
  done: false
  episode_len_mean: 282.85
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8284999999999836
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2144
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.690850622175825e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0250228881835937
          entropy_coeff: 0.009999999999999998
          kl: 0.019925035241533665
          policy_loss: -0.03358717846373717
          total_loss: -0.02889613943795363
          vf_explained_var: 0.2050657719373703
          vf_loss: 0.014941270659781165
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 62300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,623,15679.2,623000,-2.8285,-2.42,-4.01,282.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-10-24_12-29-41
  done: false
  episode_len_mean: 282.98
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8297999999999837
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2148
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.690850622175825e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9300015409787495
          entropy_coeff: 0.009999999999999998
          kl: 0.008041499953964306
          policy_loss: -0.023383255137337577
          total_loss: -0.01915106905831231
          vf_explained_var: 0.23357799649238586
          vf_loss: 0.013532200341837273
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 624

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,624,15704.6,624000,-2.8298,-2.42,-4.01,282.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-10-24_12-30-07
  done: false
  episode_len_mean: 282.95
  episode_media: {}
  episode_reward_max: -2.4199999999999924
  episode_reward_mean: -2.8294999999999835
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2152
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.690850622175825e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0422303716341654
          entropy_coeff: 0.009999999999999998
          kl: 0.014835828850882157
          policy_loss: 0.009728989377617837
          total_loss: 0.01195501432650619
          vf_explained_var: 0.36112475395202637
          vf_loss: 0.012648327431331078
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 62500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,625,15730.2,625000,-2.8295,-2.42,-4.01,282.95


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-10-24_12-30-32
  done: false
  episode_len_mean: 283.48
  episode_media: {}
  episode_reward_max: -2.439999999999992
  episode_reward_mean: -2.8347999999999827
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2156
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.690850622175825e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0375155601236554
          entropy_coeff: 0.009999999999999998
          kl: 0.0064557195763906665
          policy_loss: -0.001992031517955992
          total_loss: -0.0003675965799225701
          vf_explained_var: 0.4600020945072174
          vf_loss: 0.011999591605530845
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 62

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,626,15754.9,626000,-2.8348,-2.44,-4.01,283.48




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-10-24_12-31-13
  done: false
  episode_len_mean: 283.45
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.8344999999999834
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2160
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.690850622175825e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.028136150042216
          entropy_coeff: 0.009999999999999998
          kl: 0.007741720506999646
          policy_loss: 0.021825455791420405
          total_loss: 0.023237848447428808
          vf_explained_var: 0.4859423339366913
          vf_loss: 0.011693753643582265
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,627,15796,627000,-2.8345,-2.45,-4.01,283.45


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-10-24_12-31-40
  done: false
  episode_len_mean: 281.52
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.8151999999999835
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2163
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.690850622175825e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9619296928246815
          entropy_coeff: 0.009999999999999998
          kl: 0.008508461959485677
          policy_loss: -0.10266881883144378
          total_loss: -0.10072447376118766
          vf_explained_var: 0.4339890480041504
          vf_loss: 0.011563638339026107
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 62800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,628,15822.9,628000,-2.8152,-2.45,-4.01,281.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-10-24_12-32-05
  done: false
  episode_len_mean: 279.49
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.7948999999999837
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2167
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.690850622175825e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9854300982422299
          entropy_coeff: 0.009999999999999998
          kl: 0.03070434079274528
          policy_loss: -0.013543696370389727
          total_loss: -0.0127606939110491
          vf_explained_var: 0.5293312668800354
          vf_loss: 0.010637304300649299
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,629,15848.1,629000,-2.7949,-2.45,-4.01,279.49


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-10-24_12-32-29
  done: false
  episode_len_mean: 279.47
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.794699999999984
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2171
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.036275933263736e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8303634021017287
          entropy_coeff: 0.009999999999999998
          kl: 0.011769984595439237
          policy_loss: 0.012271019361085362
          total_loss: 0.015900906547904013
          vf_explained_var: 0.4616582691669464
          vf_loss: 0.011933523996008766
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 630000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,630,15871.9,630000,-2.7947,-2.45,-4.01,279.47


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-10-24_12-32-52
  done: false
  episode_len_mean: 279.86
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.7985999999999835
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2174
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.036275933263736e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8565540836917029
          entropy_coeff: 0.009999999999999998
          kl: 0.014912935350816031
          policy_loss: 0.028322664234373303
          total_loss: 0.02838024331463708
          vf_explained_var: 0.4364531934261322
          vf_loss: 0.008623119816830796
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,631,15894.9,631000,-2.7986,-2.45,-4.01,279.86


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-10-24_12-33-15
  done: false
  episode_len_mean: 280.69
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.8068999999999833
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2178
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.036275933263736e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9556933422883351
          entropy_coeff: 0.009999999999999998
          kl: 0.023139190406923957
          policy_loss: 0.003302439757519298
          total_loss: 0.006770220398902893
          vf_explained_var: 0.38852983713150024
          vf_loss: 0.013024716679420735
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 6320

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,632,15918,632000,-2.8069,-2.45,-4.01,280.69


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-10-24_12-33-37
  done: false
  episode_len_mean: 281.22
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.8121999999999834
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2181
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.054413899895605e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0017505486806233
          entropy_coeff: 0.009999999999999998
          kl: 0.006773767975716933
          policy_loss: 0.03440891835424635
          total_loss: 0.03476501794324981
          vf_explained_var: 0.4284026324748993
          vf_loss: 0.010373604844789951
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,633,15940.3,633000,-2.8122,-2.45,-4.01,281.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-10-24_12-34-00
  done: false
  episode_len_mean: 281.6
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.815999999999983
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2184
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.054413899895605e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9933000266551971
          entropy_coeff: 0.009999999999999998
          kl: 0.007761599414736983
          policy_loss: -0.09140748795535829
          total_loss: -0.08534796668423547
          vf_explained_var: 0.1471419483423233
          vf_loss: 0.01599252089444134
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 634000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,634,15963.4,634000,-2.816,-2.45,-4.01,281.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-10-24_12-34-24
  done: false
  episode_len_mean: 281.44
  episode_media: {}
  episode_reward_max: -2.4499999999999917
  episode_reward_mean: -2.8143999999999845
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2188
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.054413899895605e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.032263493537903
          entropy_coeff: 0.009999999999999998
          kl: 0.013374609260145812
          policy_loss: 0.01796160837014516
          total_loss: 0.0209532945520348
          vf_explained_var: 0.3696841299533844
          vf_loss: 0.01331432152332531
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,635,15986.8,635000,-2.8144,-2.45,-4.01,281.44




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-10-24_12-35-06
  done: false
  episode_len_mean: 281.66
  episode_media: {}
  episode_reward_max: -2.349999999999994
  episode_reward_mean: -2.816599999999984
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2192
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.054413899895605e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8972231401337518
          entropy_coeff: 0.009999999999999998
          kl: 0.00857596096822153
          policy_loss: 0.006110280421045091
          total_loss: 0.010469178441498014
          vf_explained_var: 0.35501331090927124
          vf_loss: 0.013331128294683164
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,636,16028.9,636000,-2.8166,-2.35,-4.01,281.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-10-24_12-35-30
  done: false
  episode_len_mean: 282.02
  episode_media: {}
  episode_reward_max: -2.349999999999994
  episode_reward_mean: -2.820199999999984
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2195
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.054413899895605e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9162838359673818
          entropy_coeff: 0.009999999999999998
          kl: 0.038524768429277226
          policy_loss: 0.05064617238110966
          total_loss: 0.04974636435508728
          vf_explained_var: 0.28162682056427
          vf_loss: 0.008263030011827746
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,637,16053.3,637000,-2.8202,-2.35,-4.01,282.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-10-24_12-35-56
  done: false
  episode_len_mean: 281.31
  episode_media: {}
  episode_reward_max: -2.349999999999994
  episode_reward_mean: -2.8130999999999835
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2199
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.081620849843405e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0476633853382535
          entropy_coeff: 0.009999999999999998
          kl: 0.015460063274895772
          policy_loss: 0.009605930373072624
          total_loss: 0.01181054049068027
          vf_explained_var: 0.36595606803894043
          vf_loss: 0.012681244303368859
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,638,16078.5,638000,-2.8131,-2.35,-4.01,281.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-10-24_12-36-19
  done: false
  episode_len_mean: 281.12
  episode_media: {}
  episode_reward_max: -2.349999999999994
  episode_reward_mean: -2.8111999999999835
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2203
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.081620849843405e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9952356490823958
          entropy_coeff: 0.009999999999999998
          kl: 0.007548185153213538
          policy_loss: 0.01708909629119767
          total_loss: 0.019893782999780445
          vf_explained_var: 0.39804166555404663
          vf_loss: 0.012757043944050868
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,639,16102.4,639000,-2.8112,-2.35,-4.01,281.12


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-10-24_12-36-45
  done: false
  episode_len_mean: 280.53
  episode_media: {}
  episode_reward_max: -2.349999999999994
  episode_reward_mean: -2.805299999999984
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2206
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.081620849843405e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1073984702428181
          entropy_coeff: 0.009999999999999998
          kl: 0.007241251209229264
          policy_loss: -0.09200909468862746
          total_loss: -0.09008679704533683
          vf_explained_var: 0.36931291222572327
          vf_loss: 0.012996282604419523
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,640,16127.5,640000,-2.8053,-2.35,-4.01,280.53


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-10-24_12-37-09
  done: false
  episode_len_mean: 280.53
  episode_media: {}
  episode_reward_max: -2.349999999999994
  episode_reward_mean: -2.8052999999999844
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2210
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.081620849843405e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0664254440201653
          entropy_coeff: 0.009999999999999998
          kl: 0.014389986502525194
          policy_loss: 0.0016943875286314222
          total_loss: 0.005026529563797844
          vf_explained_var: 0.21238218247890472
          vf_loss: 0.013996399980452325
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 6410

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,641,16151.6,641000,-2.8053,-2.35,-4.01,280.53


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-10-24_12-37-33
  done: false
  episode_len_mean: 281.03
  episode_media: {}
  episode_reward_max: -2.349999999999994
  episode_reward_mean: -2.8102999999999843
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 4
  episodes_total: 2214
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.081620849843405e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.2214301890797086
          entropy_coeff: 0.009999999999999998
          kl: 0.028837797712470307
          policy_loss: -0.006596516817808151
          total_loss: -0.006221826126178106
          vf_explained_var: 0.42518025636672974
          vf_loss: 0.012588993594464328
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 642

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,642,16175.5,642000,-2.8103,-2.35,-4.01,281.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-10-24_12-37-58
  done: false
  episode_len_mean: 281.51
  episode_media: {}
  episode_reward_max: -2.349999999999994
  episode_reward_mean: -2.815099999999984
  episode_reward_min: -4.009999999999959
  episodes_this_iter: 3
  episodes_total: 2217
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3622431274765115e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9579644481341044
          entropy_coeff: 0.009999999999999998
          kl: 0.00816995870060361
          policy_loss: -0.06218006130721834
          total_loss: -0.061547075543138714
          vf_explained_var: 0.41414886713027954
          vf_loss: 0.010212631435650918
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 64300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,643,16200.5,643000,-2.8151,-2.35,-4.01,281.51




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-10-24_12-38-41
  done: false
  episode_len_mean: 278.13
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.781299999999984
  episode_reward_min: -3.93999999999996
  episodes_this_iter: 4
  episodes_total: 2221
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3622431274765115e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9685194777117835
          entropy_coeff: 0.009999999999999998
          kl: 0.003655920860931457
          policy_loss: -0.06752194753951496
          total_loss: -0.06560951620340347
          vf_explained_var: 0.4280330538749695
          vf_loss: 0.01159762976070245
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 644000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,644,16243.6,644000,-2.7813,-2.26,-3.94,278.13


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-10-24_12-39-06
  done: false
  episode_len_mean: 274.64
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7463999999999853
  episode_reward_min: -3.8299999999999623
  episodes_this_iter: 4
  episodes_total: 2225
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.811215637382557e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9606723142994775
          entropy_coeff: 0.009999999999999998
          kl: 0.009629574849261581
          policy_loss: 0.013559089663128058
          total_loss: 0.01632769935660892
          vf_explained_var: 0.35979360342025757
          vf_loss: 0.012375330045405362
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 64500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,645,16268.8,645000,-2.7464,-2.26,-3.83,274.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-10-24_12-39-31
  done: false
  episode_len_mean: 272.28
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7227999999999857
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 2229
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.811215637382557e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.954953075117535
          entropy_coeff: 0.009999999999999998
          kl: 0.009701593846356991
          policy_loss: -0.004271667119529512
          total_loss: -0.0017533199654685126
          vf_explained_var: 0.39432293176651
          vf_loss: 0.012067878887885146
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 64600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,646,16293.8,646000,-2.7228,-2.26,-3.36,272.28


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-10-24_12-39-56
  done: false
  episode_len_mean: 271.2
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7119999999999864
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 2233
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.811215637382557e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8761673914061652
          entropy_coeff: 0.009999999999999998
          kl: 0.006582295008293215
          policy_loss: 0.005533182289865282
          total_loss: 0.009662089662419425
          vf_explained_var: 0.34723788499832153
          vf_loss: 0.012890580813917848
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 64700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,647,16318.6,647000,-2.712,-2.26,-3.36,271.2


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-10-24_12-40-22
  done: false
  episode_len_mean: 270.63
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7062999999999864
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 3
  episodes_total: 2236
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.811215637382557e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9167587843206194
          entropy_coeff: 0.009999999999999998
          kl: 0.012303884643828885
          policy_loss: -0.09995756447315216
          total_loss: -0.09751396460665597
          vf_explained_var: 0.4477604031562805
          vf_loss: 0.011611183836228318
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 64800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,648,16344.6,648000,-2.7063,-2.26,-3.36,270.63


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 649000
  custom_metrics: {}
  date: 2021-10-24_12-40-47
  done: false
  episode_len_mean: 270.36
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7035999999999856
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 2240
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.811215637382557e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1778876854313745
          entropy_coeff: 0.009999999999999998
          kl: 0.13608666742816966
          policy_loss: -0.0030441227886411877
          total_loss: -0.004459986339012781
          vf_explained_var: 0.5281115174293518
          vf_loss: 0.01036301297135651
    num_agent_steps_sampled: 649000
    num_agent_steps_trained: 649000
    num_steps_sampled: 649000
    num_steps_trained: 6490

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,649,16369.5,649000,-2.7036,-2.26,-3.36,270.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 650000
  custom_metrics: {}
  date: 2021-10-24_12-41-09
  done: false
  episode_len_mean: 271.7
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.716999999999985
  episode_reward_min: -3.3599999999999723
  episodes_this_iter: 4
  episodes_total: 2244
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0216823456073836e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9512102584044139
          entropy_coeff: 0.009999999999999998
          kl: 0.02297832407394216
          policy_loss: 0.01901499844259686
          total_loss: 0.023511399411492877
          vf_explained_var: 0.23997873067855835
          vf_loss: 0.014008501999908024
    num_agent_steps_sampled: 650000
    num_agent_steps_trained: 650000
    num_steps_sampled: 650000
    num_steps_trained: 650000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,650,16392,650000,-2.717,-2.26,-3.36,271.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 651000
  custom_metrics: {}
  date: 2021-10-24_12-41-30
  done: false
  episode_len_mean: 273.87
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7386999999999855
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 3
  episodes_total: 2247
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5325235184110753e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0985966245333354
          entropy_coeff: 0.009999999999999998
          kl: 0.0343436978574978
          policy_loss: 0.08088838557402293
          total_loss: 0.07991874338024192
          vf_explained_var: 0.1803930550813675
          vf_loss: 0.010016323432015877
    num_agent_steps_sampled: 651000
    num_agent_steps_trained: 651000
    num_steps_sampled: 651000
    num_steps_trained: 651000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,651,16413,651000,-2.7387,-2.26,-3.4,273.87




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 652000
  custom_metrics: {}
  date: 2021-10-24_12-42-09
  done: false
  episode_len_mean: 275.19
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.751899999999986
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 3
  episodes_total: 2250
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2987852776166124e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9663526084687974
          entropy_coeff: 0.009999999999999998
          kl: 0.0065616802463419315
          policy_loss: 0.04898728024628427
          total_loss: 0.04975039677487479
          vf_explained_var: 0.018184663727879524
          vf_loss: 0.010426641545361943
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_steps_sampled: 652000
    num_steps_trained: 6520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,652,16451.7,652000,-2.7519,-2.26,-3.4,275.19


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 653000
  custom_metrics: {}
  date: 2021-10-24_12-42-31
  done: false
  episode_len_mean: 276.87
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.768699999999985
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 3
  episodes_total: 2253
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2987852776166124e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.058207282092836
          entropy_coeff: 0.009999999999999998
          kl: 0.01806377220075348
          policy_loss: 0.01698566691743003
          total_loss: 0.017140176561143663
          vf_explained_var: 0.26520973443984985
          vf_loss: 0.010736581335206413
    num_agent_steps_sampled: 653000
    num_agent_steps_trained: 653000
    num_steps_sampled: 653000
    num_steps_trained: 653000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,653,16473.2,653000,-2.7687,-2.26,-3.4,276.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 654000
  custom_metrics: {}
  date: 2021-10-24_12-42-53
  done: false
  episode_len_mean: 278.2
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.781999999999984
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 3
  episodes_total: 2256
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2987852776166124e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.16801930003696
          entropy_coeff: 0.009999999999999998
          kl: 0.01926603297830872
          policy_loss: -0.09495002726713817
          total_loss: -0.0917413239263826
          vf_explained_var: 0.2323310822248459
          vf_loss: 0.014888896306769716
    num_agent_steps_sampled: 654000
    num_agent_steps_trained: 654000
    num_steps_sampled: 654000
    num_steps_trained: 654000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,654,16495.5,654000,-2.782,-2.26,-3.41,278.2


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 655000
  custom_metrics: {}
  date: 2021-10-24_12-43-16
  done: false
  episode_len_mean: 279.1
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7909999999999844
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 2260
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2987852776166124e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.060694999827279
          entropy_coeff: 0.009999999999999998
          kl: 0.02177798407836396
          policy_loss: 0.019381410462988746
          total_loss: 0.024007791529099147
          vf_explained_var: 0.10138574242591858
          vf_loss: 0.015233330676952999
    num_agent_steps_sampled: 655000
    num_agent_steps_trained: 655000
    num_steps_sampled: 655000
    num_steps_trained: 655000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,655,16518.6,655000,-2.791,-2.26,-3.41,279.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 656000
  custom_metrics: {}
  date: 2021-10-24_12-43-41
  done: false
  episode_len_mean: 279.58
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.795799999999984
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 2264
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4481779164249194e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1160029040442572
          entropy_coeff: 0.009999999999999998
          kl: 0.01524342156125079
          policy_loss: -0.0026253409683704375
          total_loss: 0.0008298364778359731
          vf_explained_var: 0.17300979793071747
          vf_loss: 0.014615205033785767
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_steps_sampled: 656000
    num_steps_trained: 65

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,656,16543.5,656000,-2.7958,-2.26,-3.41,279.58


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 657000
  custom_metrics: {}
  date: 2021-10-24_12-44-05
  done: false
  episode_len_mean: 279.83
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.798299999999984
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 3
  episodes_total: 2267
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4481779164249194e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0325592597325643
          entropy_coeff: 0.009999999999999998
          kl: 0.005342935678040759
          policy_loss: -0.0466081403195858
          total_loss: -0.04703481838934952
          vf_explained_var: 0.24793872237205505
          vf_loss: 0.009898914945208365
    num_agent_steps_sampled: 657000
    num_agent_steps_trained: 657000
    num_steps_sampled: 657000
    num_steps_trained: 65700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,657,16567.9,657000,-2.7983,-2.26,-3.41,279.83


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 658000
  custom_metrics: {}
  date: 2021-10-24_12-44-27
  done: false
  episode_len_mean: 280.99
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.8098999999999843
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 3
  episodes_total: 2270
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4481779164249194e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9281504028373294
          entropy_coeff: 0.009999999999999998
          kl: 0.04532824479537325
          policy_loss: -0.10082471784618166
          total_loss: -0.09508182464374436
          vf_explained_var: 0.16998352110385895
          vf_loss: 0.015024395080076323
    num_agent_steps_sampled: 658000
    num_agent_steps_trained: 658000
    num_steps_sampled: 658000
    num_steps_trained: 6580

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,658,16589.9,658000,-2.8099,-2.26,-3.41,280.99


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 659000
  custom_metrics: {}
  date: 2021-10-24_12-44-52
  done: false
  episode_len_mean: 280.22
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.8021999999999845
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 2274
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1722668746373794e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1603433854050107
          entropy_coeff: 0.009999999999999998
          kl: 0.015710744451283176
          policy_loss: -0.02195229422714975
          total_loss: -0.018493029806349013
          vf_explained_var: 0.1937660574913025
          vf_loss: 0.01506269940485557
    num_agent_steps_sampled: 659000
    num_agent_steps_trained: 659000
    num_steps_sampled: 659000
    num_steps_trained: 6590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,659,16614.6,659000,-2.8022,-2.26,-3.41,280.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 660000
  custom_metrics: {}
  date: 2021-10-24_12-45-16
  done: false
  episode_len_mean: 279.89
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7988999999999846
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 2278
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1722668746373794e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4080821010801527
          entropy_coeff: 0.009999999999999998
          kl: 0.011265423918488043
          policy_loss: 0.021253524555100337
          total_loss: 0.022052632768948872
          vf_explained_var: 0.25062355399131775
          vf_loss: 0.014879928808659315
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_steps_sampled: 660000
    num_steps_trained: 660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,660,16638.1,660000,-2.7989,-2.26,-3.41,279.89




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 661000
  custom_metrics: {}
  date: 2021-10-24_12-45-56
  done: false
  episode_len_mean: 279.35
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7934999999999843
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 3
  episodes_total: 2281
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1722668746373794e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3265843636459775
          entropy_coeff: 0.009999999999999998
          kl: 0.013205567373849666
          policy_loss: -0.09626515044106378
          total_loss: -0.09551227821244133
          vf_explained_var: 0.37838804721832275
          vf_loss: 0.014018715659363402
    num_agent_steps_sampled: 661000
    num_agent_steps_trained: 661000
    num_steps_sampled: 661000
    num_steps_trained: 661

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,661,16678.7,661000,-2.7935,-2.26,-3.41,279.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 662000
  custom_metrics: {}
  date: 2021-10-24_12-46-18
  done: false
  episode_len_mean: 279.35
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.793499999999984
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 3
  episodes_total: 2284
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1722668746373794e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.4320638683107165
          entropy_coeff: 0.009999999999999998
          kl: 0.017614115931357963
          policy_loss: -0.09246507651276059
          total_loss: -0.09422650420003467
          vf_explained_var: 0.5254421234130859
          vf_loss: 0.012559211005767186
    num_agent_steps_sampled: 662000
    num_agent_steps_trained: 662000
    num_steps_sampled: 662000
    num_steps_trained: 66200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,662,16700.5,662000,-2.7935,-2.26,-3.41,279.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 663000
  custom_metrics: {}
  date: 2021-10-24_12-46-43
  done: false
  episode_len_mean: 279.65
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.796499999999984
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 2288
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1722668746373794e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.3669948299725851
          entropy_coeff: 0.009999999999999998
          kl: 0.026222732902481817
          policy_loss: 0.037680919013089606
          total_loss: 0.03403605471054713
          vf_explained_var: 0.5756799578666687
          vf_loss: 0.010025084743069278
    num_agent_steps_sampled: 663000
    num_agent_steps_trained: 663000
    num_steps_sampled: 663000
    num_steps_trained: 663000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,663,16725.1,663000,-2.7965,-2.26,-3.41,279.65


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 664000
  custom_metrics: {}
  date: 2021-10-24_12-47-06
  done: false
  episode_len_mean: 280.24
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.802399999999985
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 2292
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.758400311956069e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.255239889356825
          entropy_coeff: 0.009999999999999998
          kl: 0.010726406921342817
          policy_loss: 0.0332606499393781
          total_loss: 0.029626402424441443
          vf_explained_var: 0.7195810079574585
          vf_loss: 0.008918150276359584
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_steps_sampled: 664000
    num_steps_trained: 664000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,664,16748.3,664000,-2.8024,-2.26,-3.41,280.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 665000
  custom_metrics: {}
  date: 2021-10-24_12-47-32
  done: false
  episode_len_mean: 279.47
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.794699999999984
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 3
  episodes_total: 2295
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.758400311956069e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8009226083755493
          entropy_coeff: 0.009999999999999998
          kl: 0.006266993275821174
          policy_loss: -0.08430158657332261
          total_loss: -0.08391512595117093
          vf_explained_var: 0.6895290613174438
          vf_loss: 0.008395684939912624
    num_agent_steps_sampled: 665000
    num_agent_steps_trained: 665000
    num_steps_sampled: 665000
    num_steps_trained: 665000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,665,16774.3,665000,-2.7947,-2.26,-3.41,279.47


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 666000
  custom_metrics: {}
  date: 2021-10-24_12-47-58
  done: false
  episode_len_mean: 279.22
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7921999999999843
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 2299
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.758400311956069e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.8631406181388431
          entropy_coeff: 0.009999999999999998
          kl: 0.009087731303622348
          policy_loss: -0.08888369703458415
          total_loss: -0.08585951858096652
          vf_explained_var: 0.47793057560920715
          vf_loss: 0.011655584836585655
    num_agent_steps_sampled: 666000
    num_agent_steps_trained: 666000
    num_steps_sampled: 666000
    num_steps_trained: 6660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,666,16800.2,666000,-2.7922,-2.26,-3.41,279.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 667000
  custom_metrics: {}
  date: 2021-10-24_12-48-22
  done: false
  episode_len_mean: 279.29
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7928999999999835
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 4
  episodes_total: 2303
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.758400311956069e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9816585255993737
          entropy_coeff: 0.009999999999999998
          kl: 0.007655269264999968
          policy_loss: 0.05050495614608129
          total_loss: 0.050614555428425474
          vf_explained_var: 0.4857889413833618
          vf_loss: 0.00992618492907948
    num_agent_steps_sampled: 667000
    num_agent_steps_trained: 667000
    num_steps_sampled: 667000
    num_steps_trained: 667000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,667,16824,667000,-2.7929,-2.26,-3.41,279.29


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 668000
  custom_metrics: {}
  date: 2021-10-24_12-48-44
  done: false
  episode_len_mean: 279.61
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.7960999999999836
  episode_reward_min: -3.4099999999999713
  episodes_this_iter: 3
  episodes_total: 2306
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.758400311956069e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1305677904023064
          entropy_coeff: 0.009999999999999998
          kl: 0.02184580868566949
          policy_loss: -0.08950768990649118
          total_loss: -0.08763634430037605
          vf_explained_var: 0.30567532777786255
          vf_loss: 0.013177023859073718
    num_agent_steps_sampled: 668000
    num_agent_steps_trained: 668000
    num_steps_sampled: 668000
    num_steps_trained: 66800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,668,16846.4,668000,-2.7961,-2.26,-3.41,279.61




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 669000
  custom_metrics: {}
  date: 2021-10-24_12-49-24
  done: false
  episode_len_mean: 280.45
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.804499999999984
  episode_reward_min: -3.6699999999999657
  episodes_this_iter: 4
  episodes_total: 2310
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1637600467934101e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9839120772149827
          entropy_coeff: 0.009999999999999998
          kl: 0.013668877977607252
          policy_loss: 0.04975605706373851
          total_loss: 0.05025734901428223
          vf_explained_var: 0.5253056287765503
          vf_loss: 0.010340415365580056
    num_agent_steps_sampled: 669000
    num_agent_steps_trained: 669000
    num_steps_sampled: 669000
    num_steps_trained: 669000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,669,16886.1,669000,-2.8045,-2.26,-3.67,280.45


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 670000
  custom_metrics: {}
  date: 2021-10-24_12-49-49
  done: false
  episode_len_mean: 281.16
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.811599999999984
  episode_reward_min: -3.6699999999999657
  episodes_this_iter: 3
  episodes_total: 2313
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1637600467934101e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8341118249628279
          entropy_coeff: 0.009999999999999998
          kl: 0.027727895591233714
          policy_loss: -0.0381608200362987
          total_loss: -0.03255808409303427
          vf_explained_var: 0.2112812101840973
          vf_loss: 0.01394384942120976
    num_agent_steps_sampled: 670000
    num_agent_steps_trained: 670000
    num_steps_sampled: 670000
    num_steps_trained: 670000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,670,16911.2,670000,-2.8116,-2.26,-3.67,281.16


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 671000
  custom_metrics: {}
  date: 2021-10-24_12-50-11
  done: false
  episode_len_mean: 281.54
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.815399999999984
  episode_reward_min: -3.6699999999999657
  episodes_this_iter: 4
  episodes_total: 2317
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7456400701901155e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0546263794104258
          entropy_coeff: 0.009999999999999998
          kl: 0.02864804332013221
          policy_loss: 0.008858098917537265
          total_loss: 0.014790531661775377
          vf_explained_var: -0.02894674427807331
          vf_loss: 0.016478696589668593
    num_agent_steps_sampled: 671000
    num_agent_steps_trained: 671000
    num_steps_sampled: 671000
    num_steps_trained: 6710

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,671,16933.4,671000,-2.8154,-2.26,-3.67,281.54


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 672000
  custom_metrics: {}
  date: 2021-10-24_12-50-34
  done: false
  episode_len_mean: 282.69
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.826899999999983
  episode_reward_min: -3.6699999999999657
  episodes_this_iter: 3
  episodes_total: 2320
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.618460105285173e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1247735699017842
          entropy_coeff: 0.009999999999999998
          kl: 0.008628356765755048
          policy_loss: -0.03574186116456986
          total_loss: -0.035964604715506235
          vf_explained_var: -0.308349072933197
          vf_loss: 0.01102499150308884
    num_agent_steps_sampled: 672000
    num_agent_steps_trained: 672000
    num_steps_sampled: 672000
    num_steps_trained: 672000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,672,16956.7,672000,-2.8269,-2.4,-3.67,282.69


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 673000
  custom_metrics: {}
  date: 2021-10-24_12-50-58
  done: false
  episode_len_mean: 283.19
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.8318999999999837
  episode_reward_min: -3.6699999999999657
  episodes_this_iter: 4
  episodes_total: 2324
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.618460105285173e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9356706665621863
          entropy_coeff: 0.009999999999999998
          kl: 0.008078624107482252
          policy_loss: 0.020416374338997734
          total_loss: 0.02533302737606896
          vf_explained_var: 0.08448073267936707
          vf_loss: 0.0142733591194782
    num_agent_steps_sampled: 673000
    num_agent_steps_trained: 673000
    num_steps_sampled: 673000
    num_steps_trained: 673000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,673,16980.6,673000,-2.8319,-2.4,-3.67,283.19


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 674000
  custom_metrics: {}
  date: 2021-10-24_12-51-18
  done: false
  episode_len_mean: 286.18
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.861799999999983
  episode_reward_min: -3.799999999999963
  episodes_this_iter: 3
  episodes_total: 2327
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.618460105285173e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1865157670444912
          entropy_coeff: 0.009999999999999998
          kl: 0.012958969302216398
          policy_loss: 0.06297075127561887
          total_loss: 0.06116650957200262
          vf_explained_var: -0.10094909369945526
          vf_loss: 0.01006091253625022
    num_agent_steps_sampled: 674000
    num_agent_steps_trained: 674000
    num_steps_sampled: 674000
    num_steps_trained: 674000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,674,16999.8,674000,-2.8618,-2.4,-3.8,286.18


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 675000
  custom_metrics: {}
  date: 2021-10-24_12-51-38
  done: false
  episode_len_mean: 288.22
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.882199999999982
  episode_reward_min: -3.799999999999963
  episodes_this_iter: 2
  episodes_total: 2329
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.618460105285173e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2532319903373719
          entropy_coeff: 0.009999999999999998
          kl: 0.021603419460274824
          policy_loss: -0.08785949415630764
          total_loss: -0.08868541320164998
          vf_explained_var: -0.29877349734306335
          vf_loss: 0.01170640018535778
    num_agent_steps_sampled: 675000
    num_agent_steps_trained: 675000
    num_steps_sampled: 675000
    num_steps_trained: 675000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,675,17020.1,675000,-2.8822,-2.4,-3.8,288.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 676000
  custom_metrics: {}
  date: 2021-10-24_12-51-59
  done: false
  episode_len_mean: 289.49
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.894899999999982
  episode_reward_min: -3.799999999999963
  episodes_this_iter: 4
  episodes_total: 2333
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.92769015792776e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1867190851105585
          entropy_coeff: 0.009999999999999998
          kl: 0.015267517383276344
          policy_loss: 0.041991300880908966
          total_loss: 0.04344789998398887
          vf_explained_var: 0.05123494938015938
          vf_loss: 0.01332378718070686
    num_agent_steps_sampled: 676000
    num_agent_steps_trained: 676000
    num_steps_sampled: 676000
    num_steps_trained: 676000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,676,17041.4,676000,-2.8949,-2.4,-3.8,289.49


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 677000
  custom_metrics: {}
  date: 2021-10-24_12-52-21
  done: false
  episode_len_mean: 290.89
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.908899999999982
  episode_reward_min: -3.799999999999963
  episodes_this_iter: 3
  episodes_total: 2336
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.92769015792776e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1444707996315426
          entropy_coeff: 0.009999999999999998
          kl: 0.010204364475788688
          policy_loss: 0.06628366692198648
          total_loss: 0.06310528284973568
          vf_explained_var: 0.280841201543808
          vf_loss: 0.008266322415632505
    num_agent_steps_sampled: 677000
    num_agent_steps_trained: 677000
    num_steps_sampled: 677000
    num_steps_trained: 677000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,677,17063.3,677000,-2.9089,-2.4,-3.8,290.89


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 678000
  custom_metrics: {}
  date: 2021-10-24_12-52-40
  done: false
  episode_len_mean: 294.5
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.944999999999981
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 3
  episodes_total: 2339
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.92769015792776e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3902101437250773
          entropy_coeff: 0.009999999999999998
          kl: 0.010653968802863447
          policy_loss: 0.0476455678542455
          total_loss: 0.042275451040930216
          vf_explained_var: -0.02438880316913128
          vf_loss: 0.008531983113951154
    num_agent_steps_sampled: 678000
    num_agent_steps_trained: 678000
    num_steps_sampled: 678000
    num_steps_trained: 678000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,678,17081.9,678000,-2.945,-2.4,-3.9,294.5




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 679000
  custom_metrics: {}
  date: 2021-10-24_12-53-17
  done: false
  episode_len_mean: 295.59
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.955899999999981
  episode_reward_min: -3.899999999999961
  episodes_this_iter: 2
  episodes_total: 2341
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.92769015792776e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.304899197154575
          entropy_coeff: 0.009999999999999998
          kl: 0.01767788587791538
          policy_loss: -0.0378097555703587
          total_loss: -0.04091794358359443
          vf_explained_var: 0.5430017709732056
          vf_loss: 0.009940796899738619
    num_agent_steps_sampled: 679000
    num_agent_steps_trained: 679000
    num_steps_sampled: 679000
    num_steps_trained: 679000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,679,17118.9,679000,-2.9559,-2.4,-3.9,295.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 680000
  custom_metrics: {}
  date: 2021-10-24_12-53-39
  done: false
  episode_len_mean: 296.76
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9675999999999814
  episode_reward_min: -4.039999999999958
  episodes_this_iter: 4
  episodes_total: 2345
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.92769015792776e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0814211222860548
          entropy_coeff: 0.009999999999999998
          kl: 0.020853242150828975
          policy_loss: -0.024620591269599066
          total_loss: -0.02398169818851683
          vf_explained_var: 0.5199992060661316
          vf_loss: 0.011453098472621706
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_steps_sampled: 680000
    num_steps_trained: 680000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,680,17140.9,680000,-2.9676,-2.4,-4.04,296.76


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 681000
  custom_metrics: {}
  date: 2021-10-24_12-54-00
  done: false
  episode_len_mean: 296.77
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9676999999999807
  episode_reward_min: -4.039999999999958
  episodes_this_iter: 3
  episodes_total: 2348
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.89153523689164e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1644933144251506
          entropy_coeff: 0.009999999999999998
          kl: 0.022163974920401428
          policy_loss: 0.079897022081746
          total_loss: 0.07677146262592739
          vf_explained_var: 0.6239314079284668
          vf_loss: 0.00851936377132208
    num_agent_steps_sampled: 681000
    num_agent_steps_trained: 681000
    num_steps_sampled: 681000
    num_steps_trained: 681000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,681,17161.9,681000,-2.9677,-2.4,-4.04,296.77


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 682000
  custom_metrics: {}
  date: 2021-10-24_12-54-19
  done: false
  episode_len_mean: 297.98
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.979799999999981
  episode_reward_min: -4.039999999999958
  episodes_this_iter: 2
  episodes_total: 2350
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.837302855337459e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.5138048079278734
          entropy_coeff: 0.009999999999999998
          kl: 0.01480240889317612
          policy_loss: -0.04426650587055418
          total_loss: -0.049272267189290786
          vf_explained_var: 0.585063099861145
          vf_loss: 0.01013227536265428
    num_agent_steps_sampled: 682000
    num_agent_steps_trained: 682000
    num_steps_sampled: 682000
    num_steps_trained: 682000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,682,17181,682000,-2.9798,-2.4,-4.04,297.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 683000
  custom_metrics: {}
  date: 2021-10-24_12-54-39
  done: false
  episode_len_mean: 298.62
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9861999999999806
  episode_reward_min: -4.039999999999958
  episodes_this_iter: 3
  episodes_total: 2353
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.837302855337459e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.077889002694024
          entropy_coeff: 0.009999999999999998
          kl: 0.029407675178640288
          policy_loss: -0.0896350510004494
          total_loss: -0.09080894655651517
          vf_explained_var: 0.7179403305053711
          vf_loss: 0.009604973977224695
    num_agent_steps_sampled: 683000
    num_agent_steps_trained: 683000
    num_steps_sampled: 683000
    num_steps_trained: 683000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,683,17201.3,683000,-2.9862,-2.4,-4.04,298.62


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 684000
  custom_metrics: {}
  date: 2021-10-24_12-55-02
  done: false
  episode_len_mean: 299.31
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9930999999999788
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 4
  episodes_total: 2357
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3255954283006186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8157607542143928
          entropy_coeff: 0.009999999999999998
          kl: 0.0046525778858813914
          policy_loss: 0.0668417404923174
          total_loss: 0.0687910166879495
          vf_explained_var: 0.591895580291748
          vf_loss: 0.010106878148184882
    num_agent_steps_sampled: 684000
    num_agent_steps_trained: 684000
    num_steps_sampled: 684000
    num_steps_trained: 684000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,684,17224.1,684000,-2.9931,-2.4,-4.2,299.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 685000
  custom_metrics: {}
  date: 2021-10-24_12-55-25
  done: false
  episode_len_mean: 298.94
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9893999999999794
  episode_reward_min: -4.199999999999955
  episodes_this_iter: 3
  episodes_total: 2360
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8548245661788516
          entropy_coeff: 0.009999999999999998
          kl: 0.006906738919604136
          policy_loss: 0.04663644010821978
          total_loss: 0.04639876791172558
          vf_explained_var: 0.5640754699707031
          vf_loss: 0.00831057280043347
    num_agent_steps_sampled: 685000
    num_agent_steps_trained: 685000
    num_steps_sampled: 685000
    num_steps_trained: 685000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,685,17247.2,685000,-2.9894,-2.4,-4.2,298.94


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 686000
  custom_metrics: {}
  date: 2021-10-24_12-55-47
  done: false
  episode_len_mean: 300.67
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.006699999999979
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2363
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1854996350076463
          entropy_coeff: 0.009999999999999998
          kl: 0.016810023952988314
          policy_loss: -0.03082374913824929
          total_loss: -0.030562077545457415
          vf_explained_var: 0.38807761669158936
          vf_loss: 0.012116660552823708
    num_agent_steps_sampled: 686000
    num_agent_steps_trained: 686000
    num_steps_sampled: 686000
    num_steps_trained: 68600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,686,17268.9,686000,-3.0067,-2.4,-4.47,300.67


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 687000
  custom_metrics: {}
  date: 2021-10-24_12-56-08
  done: false
  episode_len_mean: 302.84
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.028399999999979
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2366
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1954511271582708
          entropy_coeff: 0.009999999999999998
          kl: 0.013431696063073812
          policy_loss: -0.036797168850898745
          total_loss: -0.03720296306742562
          vf_explained_var: 0.38986408710479736
          vf_loss: 0.01154871255469819
    num_agent_steps_sampled: 687000
    num_agent_steps_trained: 687000
    num_steps_sampled: 687000
    num_steps_trained: 687000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,687,17290,687000,-3.0284,-2.4,-4.47,302.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 688000
  custom_metrics: {}
  date: 2021-10-24_12-56-28
  done: false
  episode_len_mean: 303.73
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.037299999999979
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2369
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.209587154785792
          entropy_coeff: 0.009999999999999998
          kl: 0.014150531809036377
          policy_loss: -0.10796659423245324
          total_loss: -0.10417154129180643
          vf_explained_var: 0.10987730324268341
          vf_loss: 0.01589092193171382
    num_agent_steps_sampled: 688000
    num_agent_steps_trained: 688000
    num_steps_sampled: 688000
    num_steps_trained: 688000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,688,17310.4,688000,-3.0373,-2.4,-4.47,303.73




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 689000
  custom_metrics: {}
  date: 2021-10-24_12-57-08
  done: false
  episode_len_mean: 304.34
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0433999999999792
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2372
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1049692571163177
          entropy_coeff: 0.009999999999999998
          kl: 0.010206568710232962
          policy_loss: -0.12658136354552374
          total_loss: -0.12370792975028357
          vf_explained_var: 0.2797267735004425
          vf_loss: 0.013923117953042189
    num_agent_steps_sampled: 689000
    num_agent_steps_trained: 689000
    num_steps_sampled: 689000
    num_steps_trained: 689000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,689,17350.1,689000,-3.0434,-2.4,-4.47,304.34


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 690000
  custom_metrics: {}
  date: 2021-10-24_12-57-30
  done: false
  episode_len_mean: 306.47
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.064699999999978
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2376
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.107328685786989
          entropy_coeff: 0.009999999999999998
          kl: 0.010279547122175358
          policy_loss: -0.022166218525833555
          total_loss: -0.02066626755727662
          vf_explained_var: 0.3912896513938904
          vf_loss: 0.012573231001281077
    num_agent_steps_sampled: 690000
    num_agent_steps_trained: 690000
    num_steps_sampled: 690000
    num_steps_trained: 690000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,690,17371.5,690000,-3.0647,-2.4,-4.47,306.47


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 691000
  custom_metrics: {}
  date: 2021-10-24_12-57-53
  done: false
  episode_len_mean: 306.12
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0611999999999777
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2379
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6969117638137605
          entropy_coeff: 0.009999999999999998
          kl: 0.005544989006634498
          policy_loss: 0.04020382389426232
          total_loss: 0.04101318990190824
          vf_explained_var: 0.5732313990592957
          vf_loss: 0.00777847981468464
    num_agent_steps_sampled: 691000
    num_agent_steps_trained: 691000
    num_steps_sampled: 691000
    num_steps_trained: 691000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,691,17394.9,691000,-3.0612,-2.4,-4.47,306.12


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 692000
  custom_metrics: {}
  date: 2021-10-24_12-58-17
  done: false
  episode_len_mean: 305.7
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.056999999999978
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2383
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7543751259644826
          entropy_coeff: 0.009999999999999998
          kl: 0.01320955316315633
          policy_loss: -0.001638372325234943
          total_loss: 0.002480197532309426
          vf_explained_var: 0.3837321102619171
          vf_loss: 0.011662316073973973
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_steps_sampled: 692000
    num_steps_trained: 692000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,692,17418.6,692000,-3.057,-2.4,-4.47,305.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 693000
  custom_metrics: {}
  date: 2021-10-24_12-58-43
  done: false
  episode_len_mean: 304.35
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.043499999999978
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2387
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6965287817849053
          entropy_coeff: 0.009999999999999998
          kl: 0.013129325121658693
          policy_loss: 0.00868890459338824
          total_loss: 0.015020179914103614
          vf_explained_var: 0.23249395191669464
          vf_loss: 0.013296556576258606
    num_agent_steps_sampled: 693000
    num_agent_steps_trained: 693000
    num_steps_sampled: 693000
    num_steps_trained: 693000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,693,17444.5,693000,-3.0435,-2.4,-4.47,304.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 694000
  custom_metrics: {}
  date: 2021-10-24_12-59-09
  done: false
  episode_len_mean: 303.15
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.0314999999999794
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2391
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.627977141503093e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5147546976804733
          entropy_coeff: 0.009999999999999998
          kl: 0.004773034650999285
          policy_loss: -0.0029113153202666177
          total_loss: 0.006053624384933048
          vf_explained_var: 0.24546708166599274
          vf_loss: 0.014112484134319756
    num_agent_steps_sampled: 694000
    num_agent_steps_trained: 694000
    num_steps_sampled: 694000
    num_steps_trained: 69

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,694,17471.1,694000,-3.0315,-2.32,-4.47,303.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 695000
  custom_metrics: {}
  date: 2021-10-24_12-59-34
  done: false
  episode_len_mean: 302.72
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.027199999999979
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2394
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3139885707515464e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7030450287792418
          entropy_coeff: 0.009999999999999998
          kl: 0.008071935710014107
          policy_loss: -0.09495823267433379
          total_loss: -0.08701351053184933
          vf_explained_var: 0.20368225872516632
          vf_loss: 0.014975172881450918
    num_agent_steps_sampled: 695000
    num_agent_steps_trained: 695000
    num_steps_sampled: 695000
    num_steps_trained: 6950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,695,17496.3,695000,-3.0272,-2.32,-4.47,302.72


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 696000
  custom_metrics: {}
  date: 2021-10-24_13-00-00
  done: false
  episode_len_mean: 302.81
  episode_media: {}
  episode_reward_max: -2.3199999999999945
  episode_reward_mean: -3.0280999999999785
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2398
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3139885707515464e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.704253496726354
          entropy_coeff: 0.009999999999999998
          kl: 0.005060121901322153
          policy_loss: -0.01491486132144928
          total_loss: -0.008544496612416373
          vf_explained_var: 0.2807612121105194
          vf_loss: 0.013412899503277408
    num_agent_steps_sampled: 696000
    num_agent_steps_trained: 696000
    num_steps_sampled: 696000
    num_steps_trained: 6960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,696,17521.9,696000,-3.0281,-2.32,-4.47,302.81




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 697000
  custom_metrics: {}
  date: 2021-10-24_13-00-45
  done: false
  episode_len_mean: 302.58
  episode_media: {}
  episode_reward_max: -2.299999999999995
  episode_reward_mean: -3.025799999999979
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2402
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3139885707515464e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6015043516953786
          entropy_coeff: 0.009999999999999998
          kl: 0.003046991804488286
          policy_loss: -0.02757163950138622
          total_loss: -0.02110407559408082
          vf_explained_var: 0.37214502692222595
          vf_loss: 0.01248260640228788
    num_agent_steps_sampled: 697000
    num_agent_steps_trained: 697000
    num_steps_sampled: 697000
    num_steps_trained: 697000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,697,17566.7,697000,-3.0258,-2.3,-4.47,302.58


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 698000
  custom_metrics: {}
  date: 2021-10-24_13-01-12
  done: false
  episode_len_mean: 300.48
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -3.00479999999998
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2406
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6569942853757732e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6959113912449942
          entropy_coeff: 0.009999999999999998
          kl: 0.0067895412525156
          policy_loss: -0.08839452274971538
          total_loss: -0.0774703679813279
          vf_explained_var: 0.17896434664726257
          vf_loss: 0.017883265908393595
    num_agent_steps_sampled: 698000
    num_agent_steps_trained: 698000
    num_steps_sampled: 698000
    num_steps_trained: 698000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,698,17593.4,698000,-3.0048,-2.28,-4.47,300.48


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 699000
  custom_metrics: {}
  date: 2021-10-24_13-01-37
  done: false
  episode_len_mean: 299.72
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9971999999999794
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2410
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6569942853757732e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.901026044289271
          entropy_coeff: 0.009999999999999998
          kl: 0.010278168573454613
          policy_loss: 0.02936441285742654
          total_loss: 0.03318015866809421
          vf_explained_var: 0.23886436223983765
          vf_loss: 0.012826005400468906
    num_agent_steps_sampled: 699000
    num_agent_steps_trained: 699000
    num_steps_sampled: 699000
    num_steps_trained: 699000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,699,17618.5,699000,-2.9972,-2.28,-4.47,299.72


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 700000
  custom_metrics: {}
  date: 2021-10-24_13-02-01
  done: false
  episode_len_mean: 298.68
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.98679999999998
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2414
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6569942853757732e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.795767472518815
          entropy_coeff: 0.009999999999999998
          kl: 0.013556857693162092
          policy_loss: 0.055104067342148885
          total_loss: 0.057813585963514116
          vf_explained_var: 0.3915731608867645
          vf_loss: 0.010667190897381967
    num_agent_steps_sampled: 700000
    num_agent_steps_trained: 700000
    num_steps_sampled: 700000
    num_steps_trained: 700000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,700,17643,700000,-2.9868,-2.28,-4.47,298.68


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 701000
  custom_metrics: {}
  date: 2021-10-24_13-02-28
  done: false
  episode_len_mean: 297.02
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9701999999999806
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2418
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6569942853757732e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.620966363284323
          entropy_coeff: 0.009999999999999998
          kl: 0.01643206703052158
          policy_loss: 0.04568207272224956
          total_loss: 0.051875877711508006
          vf_explained_var: 0.3136634826660156
          vf_loss: 0.012403462651289172
    num_agent_steps_sampled: 701000
    num_agent_steps_trained: 701000
    num_steps_sampled: 701000
    num_steps_trained: 701000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,701,17669.6,701000,-2.9702,-2.28,-4.47,297.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 702000
  custom_metrics: {}
  date: 2021-10-24_13-02-54
  done: false
  episode_len_mean: 296.36
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.963599999999981
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2422
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6569942853757732e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5537225643793742
          entropy_coeff: 0.009999999999999998
          kl: 0.012698087462744449
          policy_loss: 0.006350513382090463
          total_loss: 0.012638420363267263
          vf_explained_var: 0.40571945905685425
          vf_loss: 0.01182512915175822
    num_agent_steps_sampled: 702000
    num_agent_steps_trained: 702000
    num_steps_sampled: 702000
    num_steps_trained: 70200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,702,17695.8,702000,-2.9636,-2.28,-4.47,296.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 703000
  custom_metrics: {}
  date: 2021-10-24_13-03-17
  done: false
  episode_len_mean: 296.22
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.962199999999981
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2425
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6569942853757732e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8536308252149158
          entropy_coeff: 0.009999999999999998
          kl: 0.04945867115530389
          policy_loss: 0.03472631739245521
          total_loss: 0.035186825692653655
          vf_explained_var: 0.5514708161354065
          vf_loss: 0.008996808169952904
    num_agent_steps_sampled: 703000
    num_agent_steps_trained: 703000
    num_steps_sampled: 703000
    num_steps_trained: 703000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,703,17719,703000,-2.9622,-2.28,-4.47,296.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 704000
  custom_metrics: {}
  date: 2021-10-24_13-03-42
  done: false
  episode_len_mean: 293.6
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9359999999999813
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2428
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4854914280636605e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8515385760201348
          entropy_coeff: 0.009999999999999998
          kl: 0.03253050042402587
          policy_loss: -0.10129815580116378
          total_loss: -0.09793127311600579
          vf_explained_var: 0.4969036877155304
          vf_loss: 0.011882261735283665
    num_agent_steps_sampled: 704000
    num_agent_steps_trained: 704000
    num_steps_sampled: 704000
    num_steps_trained: 704000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,704,17743.6,704000,-2.936,-2.28,-4.47,293.6




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 705000
  custom_metrics: {}
  date: 2021-10-24_13-04-21
  done: false
  episode_len_mean: 294.33
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9432999999999807
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2431
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.728237142095491e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3508150378863018
          entropy_coeff: 0.009999999999999998
          kl: 0.03564669454781987
          policy_loss: -0.10443119046588739
          total_loss: -0.10566044273889727
          vf_explained_var: 0.547081470489502
          vf_loss: 0.01227888990090125
    num_agent_steps_sampled: 705000
    num_agent_steps_trained: 705000
    num_steps_sampled: 705000
    num_steps_trained: 705000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,705,17783,705000,-2.9433,-2.28,-4.47,294.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 706000
  custom_metrics: {}
  date: 2021-10-24_13-04-42
  done: false
  episode_len_mean: 297.3
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9729999999999808
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2434
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.592355713143238e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.6185429679022896
          entropy_coeff: 0.009999999999999998
          kl: 0.048299672645290455
          policy_loss: -0.010106225146187676
          total_loss: -0.015906264384587605
          vf_explained_var: 0.58631831407547
          vf_loss: 0.010385363101441827
    num_agent_steps_sampled: 706000
    num_agent_steps_trained: 706000
    num_steps_sampled: 706000
    num_steps_trained: 706000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,706,17803.2,706000,-2.973,-2.28,-4.47,297.3


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 707000
  custom_metrics: {}
  date: 2021-10-24_13-05-05
  done: false
  episode_len_mean: 296.44
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.9643999999999813
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2437
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.388533569714854e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1129027260674371
          entropy_coeff: 0.009999999999999998
          kl: 0.054620100640506306
          policy_loss: 0.03148876906683048
          total_loss: 0.02819307204335928
          vf_explained_var: 0.7123189568519592
          vf_loss: 0.007833289297478688
    num_agent_steps_sampled: 707000
    num_agent_steps_trained: 707000
    num_steps_sampled: 707000
    num_steps_trained: 707000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,707,17826.5,707000,-2.9644,-2.28,-4.47,296.44


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 708000
  custom_metrics: {}
  date: 2021-10-24_13-05-30
  done: false
  episode_len_mean: 293.09
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.930899999999981
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2441
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2582800354572282e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8543703059355418
          entropy_coeff: 0.009999999999999998
          kl: 0.016300940532316035
          policy_loss: -0.004616777764426337
          total_loss: -0.0012238702840275235
          vf_explained_var: 0.4531201720237732
          vf_loss: 0.011936595099460748
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_steps_sampled: 708000
    num_steps_trained: 70

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,708,17851.4,708000,-2.9309,-2.28,-4.47,293.09


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 709000
  custom_metrics: {}
  date: 2021-10-24_13-05-55
  done: false
  episode_len_mean: 290.73
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.907299999999982
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 3
  episodes_total: 2444
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2582800354572282e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.781556557946735
          entropy_coeff: 0.009999999999999998
          kl: 0.023089114844715177
          policy_loss: -0.10047683094938596
          total_loss: -0.09604982940687073
          vf_explained_var: 0.30362287163734436
          vf_loss: 0.012242542621162202
    num_agent_steps_sampled: 709000
    num_agent_steps_trained: 709000
    num_steps_sampled: 709000
    num_steps_trained: 70900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,709,17876.7,709000,-2.9073,-2.28,-4.47,290.73


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 710000
  custom_metrics: {}
  date: 2021-10-24_13-06-21
  done: false
  episode_len_mean: 288.41
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.8840999999999815
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2448
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8874200531858421e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6374001834127638
          entropy_coeff: 0.009999999999999998
          kl: 0.005078672628736077
          policy_loss: -0.06152964954574903
          total_loss: -0.056399935401148266
          vf_explained_var: 0.4003075361251831
          vf_loss: 0.01150371019418041
    num_agent_steps_sampled: 710000
    num_agent_steps_trained: 710000
    num_steps_sampled: 710000
    num_steps_trained: 7100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,710,17903,710000,-2.8841,-2.28,-4.47,288.41


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 711000
  custom_metrics: {}
  date: 2021-10-24_13-06-48
  done: false
  episode_len_mean: 283.96
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.8395999999999835
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 4
  episodes_total: 2452
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8874200531858421e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7278489132722219
          entropy_coeff: 0.009999999999999998
          kl: 0.0074352294731221645
          policy_loss: -0.03662136714491579
          total_loss: -0.031206702234016524
          vf_explained_var: 0.187672421336174
          vf_loss: 0.012693140769584312
    num_agent_steps_sampled: 711000
    num_agent_steps_trained: 711000
    num_steps_sampled: 711000
    num_steps_trained: 711

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,711,17929.7,711000,-2.8396,-2.28,-4.47,283.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 712000
  custom_metrics: {}
  date: 2021-10-24_13-07-15
  done: false
  episode_len_mean: 280.28
  episode_media: {}
  episode_reward_max: -2.2799999999999954
  episode_reward_mean: -2.8027999999999844
  episode_reward_min: -4.469999999999949
  episodes_this_iter: 5
  episodes_total: 2457
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8874200531858421e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.620860552125507
          entropy_coeff: 0.009999999999999998
          kl: 0.0068663210978544085
          policy_loss: -0.04031473985976643
          total_loss: -0.03227109354403284
          vf_explained_var: 0.2390076369047165
          vf_loss: 0.014252242130330868
    num_agent_steps_sampled: 712000
    num_agent_steps_trained: 712000
    num_steps_sampled: 712000
    num_steps_trained: 7120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,712,17956.9,712000,-2.8028,-2.28,-4.47,280.28




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 713000
  custom_metrics: {}
  date: 2021-10-24_13-08-00
  done: false
  episode_len_mean: 277.03
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.770299999999984
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2461
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8874200531858421e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5614939981036716
          entropy_coeff: 0.009999999999999998
          kl: 0.003784464807220742
          policy_loss: -0.024837554080618753
          total_loss: -0.019628986881838905
          vf_explained_var: 0.3001583218574524
          vf_loss: 0.010823501977655622
    num_agent_steps_sampled: 713000
    num_agent_steps_trained: 713000
    num_steps_sampled: 713000
    num_steps_trained: 71300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,713,18001.5,713000,-2.7703,-2.16,-4.41,277.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 714000
  custom_metrics: {}
  date: 2021-10-24_13-08-27
  done: false
  episode_len_mean: 274.33
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.743299999999985
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2465
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.437100265929211e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5153956224521001
          entropy_coeff: 0.009999999999999998
          kl: 0.03831116796654628
          policy_loss: 0.009276993324359257
          total_loss: 0.015418316216932402
          vf_explained_var: 0.22047074139118195
          vf_loss: 0.011295245743046205
    num_agent_steps_sampled: 714000
    num_agent_steps_trained: 714000
    num_steps_sampled: 714000
    num_steps_trained: 714000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,714,18028.9,714000,-2.7433,-2.16,-4.41,274.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 715000
  custom_metrics: {}
  date: 2021-10-24_13-08-55
  done: false
  episode_len_mean: 271.02
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.710199999999986
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2469
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4155650398893818e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.37339746952056885
          entropy_coeff: 0.009999999999999998
          kl: 0.010865104747526604
          policy_loss: 0.009880199159185091
          total_loss: 0.017667896176377932
          vf_explained_var: 0.11281871050596237
          vf_loss: 0.011521660526179605
    num_agent_steps_sampled: 715000
    num_agent_steps_trained: 715000
    num_steps_sampled: 715000
    num_steps_trained: 71500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,715,18056.3,715000,-2.7102,-2.16,-4.41,271.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 716000
  custom_metrics: {}
  date: 2021-10-24_13-09-20
  done: false
  episode_len_mean: 268.73
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.687299999999987
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2473
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4155650398893818e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.43092196153269874
          entropy_coeff: 0.009999999999999998
          kl: 0.007834448693397198
          policy_loss: 0.030897550367646746
          total_loss: 0.03823306866818004
          vf_explained_var: 0.26243579387664795
          vf_loss: 0.011644729050911136
    num_agent_steps_sampled: 716000
    num_agent_steps_trained: 716000
    num_steps_sampled: 716000
    num_steps_trained: 716000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,716,18081.7,716000,-2.6873,-2.16,-4.41,268.73


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 717000
  custom_metrics: {}
  date: 2021-10-24_13-09-48
  done: false
  episode_len_mean: 265.89
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.658899999999987
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2477
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4155650398893818e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.37498671578036413
          entropy_coeff: 0.009999999999999998
          kl: 0.0068055665615753425
          policy_loss: 0.040349290354384316
          total_loss: 0.04640930742025375
          vf_explained_var: 0.20643024146556854
          vf_loss: 0.00980987705083357
    num_agent_steps_sampled: 717000
    num_agent_steps_trained: 717000
    num_steps_sampled: 717000
    num_steps_trained: 717000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,717,18109.1,717000,-2.6589,-2.16,-4.41,265.89


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 718000
  custom_metrics: {}
  date: 2021-10-24_13-10-14
  done: false
  episode_len_mean: 264.21
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.6420999999999872
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2481
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4155650398893818e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.46363496018780603
          entropy_coeff: 0.009999999999999998
          kl: 0.007347821646433298
          policy_loss: 0.007460074126720428
          total_loss: 0.016665391623973846
          vf_explained_var: 0.08292820304632187
          vf_loss: 0.013841661852267054
    num_agent_steps_sampled: 718000
    num_agent_steps_trained: 718000
    num_steps_sampled: 718000
    num_steps_trained: 7180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,718,18135.8,718000,-2.6421,-2.16,-4.41,264.21


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 719000
  custom_metrics: {}
  date: 2021-10-24_13-10-43
  done: false
  episode_len_mean: 262.9
  episode_media: {}
  episode_reward_max: -2.159999999999998
  episode_reward_mean: -2.628999999999988
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2485
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4155650398893818e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.3484570168786579
          entropy_coeff: 0.009999999999999998
          kl: 0.0018087690335519824
          policy_loss: -0.1055647258957227
          total_loss: -0.09374508915676011
          vf_explained_var: 0.11918091028928757
          vf_loss: 0.015304205618384812
    num_agent_steps_sampled: 719000
    num_agent_steps_trained: 719000
    num_steps_sampled: 719000
    num_steps_trained: 719000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,719,18164.4,719000,-2.629,-2.16,-4.41,262.9




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 720000
  custom_metrics: {}
  date: 2021-10-24_13-11-26
  done: false
  episode_len_mean: 262.09
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.6208999999999882
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 5
  episodes_total: 2490
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.077825199446909e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3415470924642351
          entropy_coeff: 0.009999999999999998
          kl: 0.001668818060844362
          policy_loss: 0.00584245953294966
          total_loss: 0.01532142899102635
          vf_explained_var: 0.20323583483695984
          vf_loss: 0.012894441860003604
    num_agent_steps_sampled: 720000
    num_agent_steps_trained: 720000
    num_steps_sampled: 720000
    num_steps_trained: 720000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,720,18207.6,720000,-2.6209,-1.99,-4.41,262.09


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 721000
  custom_metrics: {}
  date: 2021-10-24_13-11-56
  done: false
  episode_len_mean: 261.01
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.610099999999987
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2494
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5389125997234545e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.4604280768169297
          entropy_coeff: 0.009999999999999998
          kl: 0.03751031074204418
          policy_loss: 0.052360063874059254
          total_loss: 0.06068906966182921
          vf_explained_var: 0.18021507561206818
          vf_loss: 0.012933274420599142
    num_agent_steps_sampled: 721000
    num_agent_steps_trained: 721000
    num_steps_sampled: 721000
    num_steps_trained: 721000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,721,18237.3,721000,-2.6101,-1.99,-4.41,261.01


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 722000
  custom_metrics: {}
  date: 2021-10-24_13-12-23
  done: false
  episode_len_mean: 260.42
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.604199999999988
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2498
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.308368899585182e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.4335237021247546
          entropy_coeff: 0.009999999999999998
          kl: 0.005912906228310375
          policy_loss: -0.0007727026111549801
          total_loss: 0.007414617389440536
          vf_explained_var: 0.23549114167690277
          vf_loss: 0.012522558153917392
    num_agent_steps_sampled: 722000
    num_agent_steps_trained: 722000
    num_steps_sampled: 722000
    num_steps_trained: 7220

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,722,18264.2,722000,-2.6042,-1.99,-4.41,260.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 723000
  custom_metrics: {}
  date: 2021-10-24_13-12-51
  done: false
  episode_len_mean: 259.74
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.597399999999988
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2502
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.308368899585182e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.2690825356377496
          entropy_coeff: 0.009999999999999998
          kl: 0.01381334788237029
          policy_loss: -0.10509798460536533
          total_loss: -0.0929246999323368
          vf_explained_var: 0.11378144472837448
          vf_loss: 0.014864101871434185
    num_agent_steps_sampled: 723000
    num_agent_steps_trained: 723000
    num_steps_sampled: 723000
    num_steps_trained: 723000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,723,18292.1,723000,-2.5974,-1.99,-4.41,259.74


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 724000
  custom_metrics: {}
  date: 2021-10-24_13-13-18
  done: false
  episode_len_mean: 259.03
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.590299999999989
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 5
  episodes_total: 2507
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.308368899585182e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3131838858127594
          entropy_coeff: 0.009999999999999998
          kl: 0.004519102367548713
          policy_loss: 0.027957239747047426
          total_loss: 0.03880959037277434
          vf_explained_var: 0.29131975769996643
          vf_loss: 0.013984187785536051
    num_agent_steps_sampled: 724000
    num_agent_steps_trained: 724000
    num_steps_sampled: 724000
    num_steps_trained: 724000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,724,18319.4,724000,-2.5903,-1.99,-4.41,259.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 725000
  custom_metrics: {}
  date: 2021-10-24_13-13-46
  done: false
  episode_len_mean: 257.49
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.574899999999989
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2511
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.654184449792591e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3636763580971294
          entropy_coeff: 0.009999999999999998
          kl: 0.041348430501617434
          policy_loss: 0.040273305442598134
          total_loss: 0.049459459549850884
          vf_explained_var: 0.12170927226543427
          vf_loss: 0.012822910108500056
    num_agent_steps_sampled: 725000
    num_agent_steps_trained: 725000
    num_steps_sampled: 725000
    num_steps_trained: 725000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,725,18347.3,725000,-2.5749,-1.99,-4.41,257.49


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 726000
  custom_metrics: {}
  date: 2021-10-24_13-14-15
  done: false
  episode_len_mean: 256.09
  episode_media: {}
  episode_reward_max: -1.9900000000000015
  episode_reward_mean: -2.5608999999999895
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2515
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.981276674688887e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.28238687101337645
          entropy_coeff: 0.009999999999999998
          kl: 0.0018241385211248307
          policy_loss: 0.00911423158314493
          total_loss: 0.018973498377535078
          vf_explained_var: 0.09580014646053314
          vf_loss: 0.012683135653949447
    num_agent_steps_sampled: 726000
    num_agent_steps_trained: 726000
    num_steps_sampled: 726000
    num_steps_trained: 7260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,726,18376.1,726000,-2.5609,-1.99,-4.41,256.09




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 727000
  custom_metrics: {}
  date: 2021-10-24_13-15-00
  done: false
  episode_len_mean: 254.73
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.5472999999999892
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 5
  episodes_total: 2520
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9906383373444434e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.29772700783279205
          entropy_coeff: 0.009999999999999998
          kl: 0.0016425843385655349
          policy_loss: -0.016293339596854316
          total_loss: -0.0031236186623573303
          vf_explained_var: 0.08032775670289993
          vf_loss: 0.01614698690051834
    num_agent_steps_sampled: 727000
    num_agent_steps_trained: 727000
    num_steps_sampled: 727000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,727,18420.8,727000,-2.5473,-1.98,-4.41,254.73


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 728000
  custom_metrics: {}
  date: 2021-10-24_13-15-30
  done: false
  episode_len_mean: 252.8
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.52799999999999
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 4
  episodes_total: 2524
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.953191686722217e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.27092987100283306
          entropy_coeff: 0.009999999999999998
          kl: 0.001157054161027165
          policy_loss: -0.0022957112226221297
          total_loss: 0.007308304889334573
          vf_explained_var: 0.12233904004096985
          vf_loss: 0.012313313088897201
    num_agent_steps_sampled: 728000
    num_agent_steps_trained: 728000
    num_steps_sampled: 728000
    num_steps_trained: 72800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,728,18451.1,728000,-2.528,-1.98,-4.41,252.8


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 729000
  custom_metrics: {}
  date: 2021-10-24_13-15-58
  done: false
  episode_len_mean: 249.33
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.4932999999999907
  episode_reward_min: -4.40999999999995
  episodes_this_iter: 5
  episodes_total: 2529
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.9765958433611085e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.30689094629552627
          entropy_coeff: 0.009999999999999998
          kl: 0.00356598880113476
          policy_loss: -0.01415611199206776
          total_loss: -0.0027699299156665803
          vf_explained_var: 0.34220683574676514
          vf_loss: 0.014455090618381898
    num_agent_steps_sampled: 729000
    num_agent_steps_trained: 729000
    num_steps_sampled: 729000
    num_steps_trained: 72

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,729,18479,729000,-2.4933,-1.98,-4.41,249.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 730000
  custom_metrics: {}
  date: 2021-10-24_13-16-27
  done: false
  episode_len_mean: 243.52
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.435199999999992
  episode_reward_min: -4.059999999999958
  episodes_this_iter: 4
  episodes_total: 2533
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4882979216805542e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.25263450824552114
          entropy_coeff: 0.009999999999999998
          kl: 0.002021329348860787
          policy_loss: 0.051182399110661615
          total_loss: 0.05912205937008063
          vf_explained_var: 0.19455689191818237
          vf_loss: 0.010466004194070896
    num_agent_steps_sampled: 730000
    num_agent_steps_trained: 730000
    num_steps_sampled: 730000
    num_steps_trained: 7300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,730,18507.8,730000,-2.4352,-1.98,-4.06,243.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 731000
  custom_metrics: {}
  date: 2021-10-24_13-16-57
  done: false
  episode_len_mean: 239.18
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3917999999999924
  episode_reward_min: -2.9899999999999802
  episodes_this_iter: 4
  episodes_total: 2537
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2441489608402771e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.2830946574608485
          entropy_coeff: 0.009999999999999998
          kl: 0.004109517346051443
          policy_loss: -0.025360843042532603
          total_loss: -0.013862473848793242
          vf_explained_var: 0.11927098035812378
          vf_loss: 0.014329318422824144
    num_agent_steps_sampled: 731000
    num_agent_steps_trained: 731000
    num_steps_sampled: 731000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,731,18538.4,731000,-2.3918,-1.98,-2.99,239.18


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 732000
  custom_metrics: {}
  date: 2021-10-24_13-17-27
  done: false
  episode_len_mean: 236.97
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.369699999999993
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 5
  episodes_total: 2542
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.220744804201386e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.27968994196918273
          entropy_coeff: 0.009999999999999998
          kl: 0.003267406585911519
          policy_loss: 0.017877554148435594
          total_loss: 0.03161599876152144
          vf_explained_var: 0.1629941314458847
          vf_loss: 0.016535345133807923
    num_agent_steps_sampled: 732000
    num_agent_steps_trained: 732000
    num_steps_sampled: 732000
    num_steps_trained: 73200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,732,18568.1,732000,-2.3697,-1.98,-2.8,236.97


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 733000
  custom_metrics: {}
  date: 2021-10-24_13-17-57
  done: false
  episode_len_mean: 235.75
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3574999999999933
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 2546
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.110372402100693e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.3457718524667952
          entropy_coeff: 0.009999999999999998
          kl: 0.0034585312306168545
          policy_loss: 0.0036142420851522023
          total_loss: 0.013562400887409846
          vf_explained_var: 0.16145938634872437
          vf_loss: 0.013405876772271261
    num_agent_steps_sampled: 733000
    num_agent_steps_trained: 733000
    num_steps_sampled: 733000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,733,18598.4,733000,-2.3575,-1.98,-2.8,235.75




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 734000
  custom_metrics: {}
  date: 2021-10-24_13-18-44
  done: false
  episode_len_mean: 234.39
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3438999999999934
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 5
  episodes_total: 2551
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5551862010503464e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.3474232600794898
          entropy_coeff: 0.009999999999999998
          kl: 0.004100673070052629
          policy_loss: -0.027904454867045084
          total_loss: -0.015520770682228936
          vf_explained_var: 0.33496925234794617
          vf_loss: 0.015857918239716027
    num_agent_steps_sampled: 734000
    num_agent_steps_trained: 734000
    num_steps_sampled: 734000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,734,18645.1,734000,-2.3439,-1.98,-2.8,234.39


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 735000
  custom_metrics: {}
  date: 2021-10-24_13-19-13
  done: false
  episode_len_mean: 234.12
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.341199999999994
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 2555
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.775931005251732e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.4695372412602107
          entropy_coeff: 0.009999999999999998
          kl: 0.02257139463682015
          policy_loss: -0.0590481345438295
          total_loss: -0.052309437634216416
          vf_explained_var: 0.31789901852607727
          vf_loss: 0.011434067661563555
    num_agent_steps_sampled: 735000
    num_agent_steps_trained: 735000
    num_steps_sampled: 735000
    num_steps_trained: 73500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,735,18674.2,735000,-2.3412,-1.98,-2.8,234.12


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 736000
  custom_metrics: {}
  date: 2021-10-24_13-19-43
  done: false
  episode_len_mean: 233.69
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.336899999999994
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 2559
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1663896507877595e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.30869272351264954
          entropy_coeff: 0.009999999999999998
          kl: 0.004234260845861668
          policy_loss: 0.03280370773540603
          total_loss: 0.03834326499038272
          vf_explained_var: 0.4185022711753845
          vf_loss: 0.008626484136200614
    num_agent_steps_sampled: 736000
    num_agent_steps_trained: 736000
    num_steps_sampled: 736000
    num_steps_trained: 73600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,736,18703.7,736000,-2.3369,-1.98,-2.8,233.69


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 737000
  custom_metrics: {}
  date: 2021-10-24_13-20-12
  done: false
  episode_len_mean: 233.49
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.334899999999994
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 2563
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.831948253938797e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.2783054012391302
          entropy_coeff: 0.009999999999999998
          kl: 0.0031981847356380654
          policy_loss: -0.01265493126379119
          total_loss: -0.0031768281426694657
          vf_explained_var: 0.21622681617736816
          vf_loss: 0.012261158310704762
    num_agent_steps_sampled: 737000
    num_agent_steps_trained: 737000
    num_steps_sampled: 737000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,737,18733.2,737000,-2.3349,-1.98,-2.8,233.49


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 738000
  custom_metrics: {}
  date: 2021-10-24_13-20-41
  done: false
  episode_len_mean: 233.49
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.334899999999994
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 5
  episodes_total: 2568
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9159741269693987e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.2934283567799462
          entropy_coeff: 0.009999999999999998
          kl: 0.009694084942469417
          policy_loss: -0.016954168677330017
          total_loss: -0.006723917689588335
          vf_explained_var: 0.3141724765300751
          vf_loss: 0.01316453292965889
    num_agent_steps_sampled: 738000
    num_agent_steps_trained: 738000
    num_steps_sampled: 738000
    num_steps_trained: 738

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,738,18761.7,738000,-2.3349,-1.98,-2.8,233.49


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 739000
  custom_metrics: {}
  date: 2021-10-24_13-21-09
  done: false
  episode_len_mean: 232.87
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.3286999999999947
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 4
  episodes_total: 2572
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9159741269693987e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.28308068248960705
          entropy_coeff: 0.009999999999999998
          kl: 0.06689669905240755
          policy_loss: 0.04975532533393966
          total_loss: 0.05774906269378132
          vf_explained_var: 0.306782990694046
          vf_loss: 0.010824544665714106
    num_agent_steps_sampled: 739000
    num_agent_steps_trained: 739000
    num_steps_sampled: 739000
    num_steps_trained: 739000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,739,18790.3,739000,-2.3287,-1.98,-2.8,232.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 740000
  custom_metrics: {}
  date: 2021-10-24_13-21-41
  done: false
  episode_len_mean: 232.17
  episode_media: {}
  episode_reward_max: -1.9800000000000015
  episode_reward_mean: -2.321699999999994
  episode_reward_min: -2.629999999999988
  episodes_this_iter: 4
  episodes_total: 2576
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.373961190454099e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.21697337345944512
          entropy_coeff: 0.009999999999999998
          kl: 0.0032209427583258805
          policy_loss: -0.012546434460414781
          total_loss: -0.004114104186495145
          vf_explained_var: 0.20152083039283752
          vf_loss: 0.010602063768439822
    num_agent_steps_sampled: 740000
    num_agent_steps_trained: 740000
    num_steps_sampled: 740000
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,740,18821.5,740000,-2.3217,-1.98,-2.63,232.17




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 741000
  custom_metrics: {}
  date: 2021-10-24_13-22-48
  done: false
  episode_len_mean: 231.14
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.3113999999999946
  episode_reward_min: -2.629999999999988
  episodes_this_iter: 5
  episodes_total: 2581
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1869805952270494e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.34002719273169835
          entropy_coeff: 0.009999999999999998
          kl: 0.008028359510222356
          policy_loss: 0.009719425191481909
          total_loss: 0.020611274904674955
          vf_explained_var: 0.24117238819599152
          vf_loss: 0.014292119991862112
    num_agent_steps_sampled: 741000
    num_agent_steps_trained: 741000
    num_steps_sampled: 741000
    num_steps_trained: 74

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,741,18888.5,741000,-2.3114,-1.9,-2.63,231.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 742000
  custom_metrics: {}
  date: 2021-10-24_13-23-26
  done: false
  episode_len_mean: 231.59
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.3158999999999943
  episode_reward_min: -2.629999999999988
  episodes_this_iter: 4
  episodes_total: 2585
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1869805952270494e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7556945863697264
          entropy_coeff: 0.009999999999999998
          kl: 0.04162426316165375
          policy_loss: 0.006250951439142227
          total_loss: 0.010988224463330375
          vf_explained_var: 0.38572439551353455
          vf_loss: 0.012294214508599705
    num_agent_steps_sampled: 742000
    num_agent_steps_trained: 742000
    num_steps_sampled: 742000
    num_steps_trained: 7420

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,742,18927.1,742000,-2.3159,-1.9,-2.63,231.59


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 743000
  custom_metrics: {}
  date: 2021-10-24_13-24-05
  done: false
  episode_len_mean: 231.34
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.313399999999995
  episode_reward_min: -2.629999999999988
  episodes_this_iter: 4
  episodes_total: 2589
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2804708928405743e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.3338983575503031
          entropy_coeff: 0.009999999999999998
          kl: 0.0045787871167463
          policy_loss: 0.009919178651438819
          total_loss: 0.016855604698260626
          vf_explained_var: 0.30027031898498535
          vf_loss: 0.010275412899338537
    num_agent_steps_sampled: 743000
    num_agent_steps_trained: 743000
    num_steps_sampled: 743000
    num_steps_trained: 743000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,743,18965.9,743000,-2.3134,-1.9,-2.63,231.34


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 744000
  custom_metrics: {}
  date: 2021-10-24_13-24-42
  done: false
  episode_len_mean: 231.6
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.3159999999999945
  episode_reward_min: -2.629999999999988
  episodes_this_iter: 4
  episodes_total: 2593
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6402354464202871e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.37602347350782817
          entropy_coeff: 0.009999999999999998
          kl: 0.04413677422289734
          policy_loss: -0.04083958570328024
          total_loss: -0.0345164997710122
          vf_explained_var: 0.25480490922927856
          vf_loss: 0.010083321140458186
    num_agent_steps_sampled: 744000
    num_agent_steps_trained: 744000
    num_steps_sampled: 744000
    num_steps_trained: 74400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,744,19002.6,744000,-2.316,-1.9,-2.63,231.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 745000
  custom_metrics: {}
  date: 2021-10-24_13-25-17
  done: false
  episode_len_mean: 231.36
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.3135999999999943
  episode_reward_min: -2.619999999999988
  episodes_this_iter: 5
  episodes_total: 2598
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.460353169630431e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.4111360354555978
          entropy_coeff: 0.009999999999999998
          kl: 0.01038859876152366
          policy_loss: -0.013300307840108872
          total_loss: -0.003127275655666987
          vf_explained_var: 0.17974716424942017
          vf_loss: 0.014284393636302815
    num_agent_steps_sampled: 745000
    num_agent_steps_trained: 745000
    num_steps_sampled: 745000
    num_steps_trained: 745

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,745,19038,745000,-2.3136,-1.9,-2.62,231.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 746000
  custom_metrics: {}
  date: 2021-10-24_13-25-52
  done: false
  episode_len_mean: 231.42
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.3141999999999943
  episode_reward_min: -2.619999999999988
  episodes_this_iter: 4
  episodes_total: 2602
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.460353169630431e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5119959698783026
          entropy_coeff: 0.009999999999999998
          kl: 0.07623870667466762
          policy_loss: 0.02277141511440277
          total_loss: 0.027340818776024713
          vf_explained_var: 0.1741880625486374
          vf_loss: 0.00968936432359947
    num_agent_steps_sampled: 746000
    num_agent_steps_trained: 746000
    num_steps_sampled: 746000
    num_steps_trained: 746000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,746,19073,746000,-2.3142,-1.9,-2.62,231.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 747000
  custom_metrics: {}
  date: 2021-10-24_13-26-23
  done: false
  episode_len_mean: 232.13
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.321299999999994
  episode_reward_min: -2.7999999999999843
  episodes_this_iter: 3
  episodes_total: 2605
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.690529754445646e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5498520877626207
          entropy_coeff: 0.009999999999999998
          kl: 0.02539504599902563
          policy_loss: -0.10039912056591775
          total_loss: -0.09282634763254059
          vf_explained_var: 0.07907922565937042
          vf_loss: 0.013071295794927412
    num_agent_steps_sampled: 747000
    num_agent_steps_trained: 747000
    num_steps_sampled: 747000
    num_steps_trained: 74700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,747,19103.9,747000,-2.3213,-1.9,-2.8,232.13


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 748000
  custom_metrics: {}
  date: 2021-10-24_13-26-49
  done: false
  episode_len_mean: 235.66
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.3565999999999936
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2609
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.53579463166847e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.848233065340254
          entropy_coeff: 0.009999999999999998
          kl: 0.02285917895013401
          policy_loss: 0.03910279952817493
          total_loss: 0.03985236701038149
          vf_explained_var: 0.13885919749736786
          vf_loss: 0.00923189802043554
    num_agent_steps_sampled: 748000
    num_agent_steps_trained: 748000
    num_steps_sampled: 748000
    num_steps_trained: 748000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,748,19130.1,748000,-2.3566,-1.9,-3.5,235.66




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 749000
  custom_metrics: {}
  date: 2021-10-24_13-27-39
  done: false
  episode_len_mean: 236.81
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.3680999999999934
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 2612
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.303691947502703e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.8128515263398488
          entropy_coeff: 0.009999999999999998
          kl: 0.07782516889532379
          policy_loss: 0.009613001263803905
          total_loss: 0.009843154168791241
          vf_explained_var: 0.3616488575935364
          vf_loss: 0.00835866611968312
    num_agent_steps_sampled: 749000
    num_agent_steps_trained: 749000
    num_steps_sampled: 749000
    num_steps_trained: 749000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,749,19179.8,749000,-2.3681,-1.9,-3.5,236.81


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 750000
  custom_metrics: {}
  date: 2021-10-24_13-28-15
  done: false
  episode_len_mean: 237.94
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.379399999999993
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2616
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2455537921254057e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7186119814713796
          entropy_coeff: 0.009999999999999998
          kl: 0.029584213771595893
          policy_loss: -0.01771413567993376
          total_loss: -0.014848877572351031
          vf_explained_var: 0.3650648891925812
          vf_loss: 0.01005137589139243
    num_agent_steps_sampled: 750000
    num_agent_steps_trained: 750000
    num_steps_sampled: 750000
    num_steps_trained: 7500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,750,19216.1,750000,-2.3794,-1.9,-3.5,237.94


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 751000
  custom_metrics: {}
  date: 2021-10-24_13-28-49
  done: false
  episode_len_mean: 239.31
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.3930999999999925
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2620
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8683306881881075e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6932403412130144
          entropy_coeff: 0.009999999999999998
          kl: 0.006036594008256636
          policy_loss: 0.011942655262019899
          total_loss: 0.014118546081913843
          vf_explained_var: 0.3602520823478699
          vf_loss: 0.009108292849527464
    num_agent_steps_sampled: 751000
    num_agent_steps_trained: 751000
    num_steps_sampled: 751000
    num_steps_trained: 751

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,751,19250.1,751000,-2.3931,-1.9,-3.5,239.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 752000
  custom_metrics: {}
  date: 2021-10-24_13-29-23
  done: false
  episode_len_mean: 240.23
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.4022999999999923
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2624
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8683306881881075e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6813027686542935
          entropy_coeff: 0.009999999999999998
          kl: 0.004732666509264207
          policy_loss: 0.02511299533976449
          total_loss: 0.027358344693978628
          vf_explained_var: 0.3678383231163025
          vf_loss: 0.009058377457161744
    num_agent_steps_sampled: 752000
    num_agent_steps_trained: 752000
    num_steps_sampled: 752000
    num_steps_trained: 7520

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,752,19283.8,752000,-2.4023,-1.9,-3.5,240.23


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 753000
  custom_metrics: {}
  date: 2021-10-24_13-29-56
  done: false
  episode_len_mean: 241.33
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.413299999999993
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2628
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.341653440940537e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.7473290999730428
          entropy_coeff: 0.009999999999999998
          kl: 0.07664752614220313
          policy_loss: 0.03341286347972022
          total_loss: 0.03537630778219965
          vf_explained_var: 0.3921436369419098
          vf_loss: 0.009436733446394403
    num_agent_steps_sampled: 753000
    num_agent_steps_trained: 753000
    num_steps_sampled: 753000
    num_steps_trained: 753000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,753,19316.9,753000,-2.4133,-1.9,-3.5,241.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 754000
  custom_metrics: {}
  date: 2021-10-24_13-30-28
  done: false
  episode_len_mean: 243.28
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.432799999999992
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2632
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4012480161410814e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7180367403560215
          entropy_coeff: 0.009999999999999998
          kl: 0.0064937984936095615
          policy_loss: -0.0007409773766994477
          total_loss: 0.003154847232831849
          vf_explained_var: 0.19901934266090393
          vf_loss: 0.011076189898368385
    num_agent_steps_sampled: 754000
    num_agent_steps_trained: 754000
    num_steps_sampled: 754000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,754,19348.5,754000,-2.4328,-1.9,-3.5,243.28


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 755000
  custom_metrics: {}
  date: 2021-10-24_13-30-59
  done: false
  episode_len_mean: 244.38
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.4437999999999915
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 3
  episodes_total: 2635
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4012480161410814e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.586394445432557
          entropy_coeff: 0.009999999999999998
          kl: 0.00816775360940838
          policy_loss: -0.04668936381737391
          total_loss: -0.043924236380391654
          vf_explained_var: 0.23302900791168213
          vf_loss: 0.008629072250591384
    num_agent_steps_sampled: 755000
    num_agent_steps_trained: 755000
    num_steps_sampled: 755000
    num_steps_trained: 755

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,755,19379.9,755000,-2.4438,-1.9,-3.5,244.38


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 756000
  custom_metrics: {}
  date: 2021-10-24_13-31-30
  done: false
  episode_len_mean: 246.43
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.4642999999999913
  episode_reward_min: -3.4999999999999694
  episodes_this_iter: 4
  episodes_total: 2639
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4012480161410814e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6161439309517542
          entropy_coeff: 0.009999999999999998
          kl: 0.005313964815480353
          policy_loss: 0.025017943647172716
          total_loss: 0.029816713763607873
          vf_explained_var: 0.23701100051403046
          vf_loss: 0.010960210176805655
    num_agent_steps_sampled: 756000
    num_agent_steps_trained: 756000
    num_steps_sampled: 756000
    num_steps_trained: 75

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,756,19410.1,756000,-2.4643,-1.9,-3.5,246.43




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 757000
  custom_metrics: {}
  date: 2021-10-24_13-32-08
  done: false
  episode_len_mean: 249.93
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.4992999999999905
  episode_reward_min: -4.41999999999995
  episodes_this_iter: 3
  episodes_total: 2642
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4012480161410814e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.7983363058831957
          entropy_coeff: 0.009999999999999998
          kl: 0.02803165247776936
          policy_loss: 0.06563841005166372
          total_loss: 0.06633047502901819
          vf_explained_var: 0.23324358463287354
          vf_loss: 0.008675425192793934
    num_agent_steps_sampled: 757000
    num_agent_steps_trained: 757000
    num_steps_sampled: 757000
    num_steps_trained: 757000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,757,19448.8,757000,-2.4993,-1.9,-4.42,249.93


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 758000
  custom_metrics: {}
  date: 2021-10-24_13-32-25
  done: false
  episode_len_mean: 255.83
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.5582999999999894
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 2
  episodes_total: 2644
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.101872024211622e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8755081170135074
          entropy_coeff: 0.009999999999999998
          kl: 0.006093214659101815
          policy_loss: 0.11595550874869029
          total_loss: 0.11336309661467871
          vf_explained_var: 0.19845062494277954
          vf_loss: 0.006162666886585713
    num_agent_steps_sampled: 758000
    num_agent_steps_trained: 758000
    num_steps_sampled: 758000
    num_steps_trained: 758000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,758,19465.4,758000,-2.5583,-1.9,-5.81,255.83


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 759000
  custom_metrics: {}
  date: 2021-10-24_13-32-41
  done: false
  episode_len_mean: 259.31
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.5930999999999886
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 1
  episodes_total: 2645
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.101872024211622e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.9186794996261597
          entropy_coeff: 0.009999999999999998
          kl: 0.013440080633313933
          policy_loss: -0.04931348636746406
          total_loss: -0.05136337305108706
          vf_explained_var: 0.08169073611497879
          vf_loss: 0.007136907854389089
    num_agent_steps_sampled: 759000
    num_agent_steps_trained: 759000
    num_steps_sampled: 759000
    num_steps_trained: 7590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,759,19481.9,759000,-2.5931,-1.9,-5.81,259.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 760000
  custom_metrics: {}
  date: 2021-10-24_13-33-01
  done: false
  episode_len_mean: 266.48
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.6647999999999867
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2648
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.101872024211622e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0730706916915045
          entropy_coeff: 0.009999999999999998
          kl: 0.042433666356854007
          policy_loss: 0.035746959182951184
          total_loss: 0.036659413244989184
          vf_explained_var: -0.17057937383651733
          vf_loss: 0.011643161457808067
    num_agent_steps_sampled: 760000
    num_agent_steps_trained: 760000
    num_steps_sampled: 760000
    num_steps_trained: 760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,760,19501.2,760000,-2.6648,-1.9,-5.81,266.48


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 761000
  custom_metrics: {}
  date: 2021-10-24_13-33-26
  done: false
  episode_len_mean: 270.1
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.700999999999986
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2651
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1528080363174316e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1104508969518874
          entropy_coeff: 0.009999999999999998
          kl: 0.007544871363589698
          policy_loss: 0.0206705870727698
          total_loss: 0.02207105921374427
          vf_explained_var: 0.24178585410118103
          vf_loss: 0.01250498055661511
    num_agent_steps_sampled: 761000
    num_agent_steps_trained: 761000
    num_steps_sampled: 761000
    num_steps_trained: 761000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,761,19526.7,761000,-2.701,-1.9,-5.81,270.1


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 762000
  custom_metrics: {}
  date: 2021-10-24_13-33-53
  done: false
  episode_len_mean: 272.88
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.728799999999985
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2654
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1528080363174316e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.1142286115222506
          entropy_coeff: 0.009999999999999998
          kl: 0.014812660928466908
          policy_loss: 0.028404869635899863
          total_loss: 0.028822789506779776
          vf_explained_var: 0.3174341320991516
          vf_loss: 0.011560204162055419
    num_agent_steps_sampled: 762000
    num_agent_steps_trained: 762000
    num_steps_sampled: 762000
    num_steps_trained: 76200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,762,19553.1,762000,-2.7288,-1.9,-5.81,272.88


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 763000
  custom_metrics: {}
  date: 2021-10-24_13-34-24
  done: false
  episode_len_mean: 275.45
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.754499999999985
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2657
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1528080363174316e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.0711478769779206
          entropy_coeff: 0.009999999999999998
          kl: 0.04563251397596954
          policy_loss: 0.08625362631347444
          total_loss: 0.08506436828109953
          vf_explained_var: 0.4716328978538513
          vf_loss: 0.009522222979770352
    num_agent_steps_sampled: 763000
    num_agent_steps_trained: 763000
    num_steps_sampled: 763000
    num_steps_trained: 763000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,763,19584.1,763000,-2.7545,-1.9,-5.81,275.45


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 764000
  custom_metrics: {}
  date: 2021-10-24_13-34-58
  done: false
  episode_len_mean: 276.6
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.765999999999985
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 4
  episodes_total: 2661
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.729212054476148e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8031575136714512
          entropy_coeff: 0.009999999999999998
          kl: 0.03675525883397025
          policy_loss: -0.012074589439564281
          total_loss: -0.007762835630112224
          vf_explained_var: 0.4425002336502075
          vf_loss: 0.012343328446149826
    num_agent_steps_sampled: 764000
    num_agent_steps_trained: 764000
    num_steps_sampled: 764000
    num_steps_trained: 764000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,764,19618.5,764000,-2.766,-1.9,-5.81,276.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 765000
  custom_metrics: {}
  date: 2021-10-24_13-35-25
  done: false
  episode_len_mean: 279.71
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.797099999999984
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2664
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.093818081714222e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.2726749963230557
          entropy_coeff: 0.009999999999999998
          kl: 0.09396598782594912
          policy_loss: -0.01967507708403799
          total_loss: -0.025786948452393214
          vf_explained_var: 0.5853976607322693
          vf_loss: 0.006614880847822254
    num_agent_steps_sampled: 765000
    num_agent_steps_trained: 765000
    num_steps_sampled: 765000
    num_steps_trained: 765000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,765,19645.1,765000,-2.7971,-1.9,-5.81,279.71


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 766000
  custom_metrics: {}
  date: 2021-10-24_13-35-55
  done: false
  episode_len_mean: 281.36
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.8135999999999837
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 2
  episodes_total: 2666
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0640727122571332e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5843869434462654
          entropy_coeff: 0.009999999999999998
          kl: 0.016323170330165344
          policy_loss: -0.08744880788856083
          total_loss: -0.09079172313213349
          vf_explained_var: -0.16085346043109894
          vf_loss: 0.012500954309426661
    num_agent_steps_sampled: 766000
    num_agent_steps_trained: 766000
    num_steps_sampled: 766000
    num_steps_trained: 76

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,766,19675.7,766000,-2.8136,-1.9,-5.81,281.36




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 767000
  custom_metrics: {}
  date: 2021-10-24_13-36-43
  done: false
  episode_len_mean: 284.95
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.849499999999984
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 4
  episodes_total: 2670
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0640727122571332e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5914654506577386
          entropy_coeff: 0.009999999999999998
          kl: 0.047263854740263234
          policy_loss: 0.017302289770709144
          total_loss: 0.008196293645434909
          vf_explained_var: 0.6183378100395203
          vf_loss: 0.006808658004997091
    num_agent_steps_sampled: 767000
    num_agent_steps_trained: 767000
    num_steps_sampled: 767000
    num_steps_trained: 76700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,767,19723.6,767000,-2.8495,-1.9,-5.81,284.95


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 768000
  custom_metrics: {}
  date: 2021-10-24_13-37-14
  done: false
  episode_len_mean: 287.72
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.877199999999982
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 2
  episodes_total: 2672
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5961090683857002e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.845238783624437
          entropy_coeff: 0.009999999999999998
          kl: 0.015226380668608385
          policy_loss: -0.10129604422383838
          total_loss: -0.11523924602402581
          vf_explained_var: 0.20396021008491516
          vf_loss: 0.004509181614008008
    num_agent_steps_sampled: 768000
    num_agent_steps_trained: 768000
    num_steps_sampled: 768000
    num_steps_trained: 76800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,768,19753.8,768000,-2.8772,-1.9,-5.81,287.72


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 769000
  custom_metrics: {}
  date: 2021-10-24_13-37-47
  done: false
  episode_len_mean: 291.17
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.9312999999999816
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 4
  episodes_total: 2676
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5961090683857002e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5502185146013896
          entropy_coeff: 0.009999999999999998
          kl: 0.019958271155723118
          policy_loss: -0.09276651077800327
          total_loss: -0.09851634816990959
          vf_explained_var: 0.886216402053833
          vf_loss: 0.00975235073775467
    num_agent_steps_sampled: 769000
    num_agent_steps_trained: 769000
    num_steps_sampled: 769000
    num_steps_trained: 769000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,769,19787.7,769000,-2.9313,-1.9,-5.81,291.17


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 770000
  custom_metrics: {}
  date: 2021-10-24_13-38-20
  done: false
  episode_len_mean: 294.78
  episode_media: {}
  episode_reward_max: -1.9000000000000015
  episode_reward_mean: -2.967399999999981
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2679
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5961090683857002e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.698550898498959
          entropy_coeff: 0.009999999999999998
          kl: 0.013691867303100342
          policy_loss: -0.0835406202202042
          total_loss: -0.09652119369970427
          vf_explained_var: 0.927364706993103
          vf_loss: 0.004004940059449937
    num_agent_steps_sampled: 770000
    num_agent_steps_trained: 770000
    num_steps_sampled: 770000
    num_steps_trained: 770000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,770,19819.9,770000,-2.9674,-1.9,-5.81,294.78


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 771000
  custom_metrics: {}
  date: 2021-10-24_13-38-53
  done: false
  episode_len_mean: 297.22
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -2.99179999999998
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 2
  episodes_total: 2681
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5961090683857002e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6432768755488925
          entropy_coeff: 0.009999999999999998
          kl: 0.009285331514020119
          policy_loss: -0.16741871949699189
          total_loss: -0.18092896288467777
          vf_explained_var: 0.9520081281661987
          vf_loss: 0.0029225203182755245
    num_agent_steps_sampled: 771000
    num_agent_steps_trained: 771000
    num_steps_sampled: 771000
    num_steps_trained: 771000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,771,19853.1,771000,-2.9918,-2.26,-5.81,297.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 772000
  custom_metrics: {}
  date: 2021-10-24_13-39-27
  done: false
  episode_len_mean: 300.19
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.0214999999999805
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 4
  episodes_total: 2685
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5961090683857002e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5853474497795106
          entropy_coeff: 0.009999999999999998
          kl: 0.03803835327645449
          policy_loss: -0.0009996000263426039
          total_loss: -0.012696083883444469
          vf_explained_var: 0.9154582619667053
          vf_loss: 0.004156992312831184
    num_agent_steps_sampled: 772000
    num_agent_steps_trained: 772000
    num_steps_sampled: 772000
    num_steps_trained: 772

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,772,19887,772000,-3.0215,-2.26,-5.81,300.19


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 773000
  custom_metrics: {}
  date: 2021-10-24_13-40-01
  done: false
  episode_len_mean: 302.77
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.057199999999979
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2688
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.39416360257855e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5267283704545762
          entropy_coeff: 0.009999999999999998
          kl: 0.016658260050860938
          policy_loss: 0.015156684070825576
          total_loss: 0.014098453356160058
          vf_explained_var: 0.7180027365684509
          vf_loss: 0.01420905365763853
    num_agent_steps_sampled: 773000
    num_agent_steps_trained: 773000
    num_steps_sampled: 773000
    num_steps_trained: 773000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,773,19921.1,773000,-3.0572,-2.26,-5.81,302.77


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 774000
  custom_metrics: {}
  date: 2021-10-24_13-40-40
  done: false
  episode_len_mean: 304.96
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.0790999999999786
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2691
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.39416360257855e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5557236591974895
          entropy_coeff: 0.009999999999999998
          kl: 0.028286458694804197
          policy_loss: 0.033071016354693304
          total_loss: 0.025237288574377695
          vf_explained_var: 0.6583817005157471
          vf_loss: 0.00772350924089551
    num_agent_steps_sampled: 774000
    num_agent_steps_trained: 774000
    num_steps_sampled: 774000
    num_steps_trained: 774000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,774,19959.8,774000,-3.0791,-2.26,-5.81,304.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 775000
  custom_metrics: {}
  date: 2021-10-24_13-41-14
  done: false
  episode_len_mean: 307.89
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.108399999999977
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2694
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.591245403867824e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6699435459242926
          entropy_coeff: 0.009999999999999998
          kl: 0.023494190448028165
          policy_loss: 0.018183358924256432
          total_loss: 0.005722740499509706
          vf_explained_var: 0.8583396077156067
          vf_loss: 0.00423881169473235
    num_agent_steps_sampled: 775000
    num_agent_steps_trained: 775000
    num_steps_sampled: 775000
    num_steps_trained: 775000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,775,19994.4,775000,-3.1084,-2.26,-5.81,307.89


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 776000
  custom_metrics: {}
  date: 2021-10-24_13-41-49
  done: false
  episode_len_mean: 309.8
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.1274999999999773
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2697
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5996949672698975
          entropy_coeff: 0.009999999999999998
          kl: 0.010982262362218817
          policy_loss: -0.14074429290162194
          total_loss: -0.150295546816455
          vf_explained_var: 0.7309721112251282
          vf_loss: 0.006445695423624582
    num_agent_steps_sampled: 776000
    num_agent_steps_trained: 776000
    num_steps_sampled: 776000
    num_steps_trained: 776000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,776,20028.7,776000,-3.1275,-2.26,-5.81,309.8




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 777000
  custom_metrics: {}
  date: 2021-10-24_13-42-40
  done: false
  episode_len_mean: 312.04
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.149899999999976
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 4
  episodes_total: 2701
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5775165888998244
          entropy_coeff: 0.009999999999999998
          kl: 0.010003643876723424
          policy_loss: 0.07225088675816854
          total_loss: 0.06262593964735667
          vf_explained_var: 0.7316579222679138
          vf_loss: 0.0061502180527895686
    num_agent_steps_sampled: 777000
    num_agent_steps_trained: 777000
    num_steps_sampled: 777000
    num_steps_trained: 777000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,777,20079.5,777000,-3.1499,-2.26,-5.81,312.04


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 778000
  custom_metrics: {}
  date: 2021-10-24_13-43-13
  done: false
  episode_len_mean: 313.73
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.1667999999999763
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2704
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.600071402390798
          entropy_coeff: 0.009999999999999998
          kl: 0.008727857193966947
          policy_loss: 0.025836302505599127
          total_loss: 0.013754149857494567
          vf_explained_var: 0.8259048461914062
          vf_loss: 0.003918561301866753
    num_agent_steps_sampled: 778000
    num_agent_steps_trained: 778000
    num_steps_sampled: 778000
    num_steps_trained: 778000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,778,20112.9,778000,-3.1668,-2.26,-5.81,313.73


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 779000
  custom_metrics: {}
  date: 2021-10-24_13-43-46
  done: false
  episode_len_mean: 313.72
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.1666999999999774
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2707
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5366630567444695
          entropy_coeff: 0.009999999999999998
          kl: 0.019341136285207245
          policy_loss: -0.13200181755754684
          total_loss: -0.14018076575464672
          vf_explained_var: 0.7000134587287903
          vf_loss: 0.007187680398217506
    num_agent_steps_sampled: 779000
    num_agent_steps_trained: 779000
    num_steps_sampled: 779000
    num_steps_trained: 77900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,779,20145.6,779000,-3.1667,-2.26,-5.81,313.72


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 780000
  custom_metrics: {}
  date: 2021-10-24_13-44-18
  done: false
  episode_len_mean: 314.25
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.171999999999976
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 4
  episodes_total: 2711
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6405035191112094
          entropy_coeff: 0.009999999999999998
          kl: 0.016511541657382386
          policy_loss: 0.06822539079520437
          total_loss: 0.05911693792376253
          vf_explained_var: 0.716736376285553
          vf_loss: 0.007296584634524253
    num_agent_steps_sampled: 780000
    num_agent_steps_trained: 780000
    num_steps_sampled: 780000
    num_steps_trained: 780000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,780,20178,780000,-3.172,-2.26,-5.81,314.25


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 781000
  custom_metrics: {}
  date: 2021-10-24_13-44-49
  done: false
  episode_len_mean: 315.73
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.1867999999999754
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2714
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7242835720380147
          entropy_coeff: 0.009999999999999998
          kl: 0.012930128928887698
          policy_loss: 0.051116178184747695
          total_loss: 0.04013370532128546
          vf_explained_var: 0.725238025188446
          vf_loss: 0.006260360493454047
    num_agent_steps_sampled: 781000
    num_agent_steps_trained: 781000
    num_steps_sampled: 781000
    num_steps_trained: 781000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,781,20209.1,781000,-3.1868,-2.26,-5.81,315.73


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 782000
  custom_metrics: {}
  date: 2021-10-24_13-45-20
  done: false
  episode_len_mean: 317.56
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.205099999999976
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2717
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.7044996552997165
          entropy_coeff: 0.009999999999999998
          kl: 0.008635426339779784
          policy_loss: 0.0687801664074262
          total_loss: 0.05889159755574332
          vf_explained_var: 0.6374136805534363
          vf_loss: 0.007156427392813688
    num_agent_steps_sampled: 782000
    num_agent_steps_trained: 782000
    num_steps_sampled: 782000
    num_steps_trained: 782000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,782,20239.8,782000,-3.2051,-2.26,-5.81,317.56


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 783000
  custom_metrics: {}
  date: 2021-10-24_13-45-52
  done: false
  episode_len_mean: 319.6
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.225499999999975
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2720
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6524101376533509
          entropy_coeff: 0.009999999999999998
          kl: 0.012358280313648735
          policy_loss: 0.03740560900833872
          total_loss: 0.02899562335676617
          vf_explained_var: 0.5511165857315063
          vf_loss: 0.008114115354126422
    num_agent_steps_sampled: 783000
    num_agent_steps_trained: 783000
    num_steps_sampled: 783000
    num_steps_trained: 783000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,783,20272.1,783000,-3.2255,-2.26,-5.81,319.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 784000
  custom_metrics: {}
  date: 2021-10-24_13-46-23
  done: false
  episode_len_mean: 321.36
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2430999999999752
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2723
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6024370484881931
          entropy_coeff: 0.009999999999999998
          kl: 0.015802153460181264
          policy_loss: -0.10103229615423415
          total_loss: -0.1042971808049414
          vf_explained_var: 0.3550052344799042
          vf_loss: 0.012759481106574337
    num_agent_steps_sampled: 784000
    num_agent_steps_trained: 784000
    num_steps_sampled: 784000
    num_steps_trained: 784000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,784,20302.4,784000,-3.2431,-2.26,-5.81,321.36


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 785000
  custom_metrics: {}
  date: 2021-10-24_13-46-54
  done: false
  episode_len_mean: 323.16
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.261099999999974
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 4
  episodes_total: 2727
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6311918709013198
          entropy_coeff: 0.009999999999999998
          kl: 0.015683284698058998
          policy_loss: 0.0064287576410505505
          total_loss: 0.00019636986156304677
          vf_explained_var: 0.5756270885467529
          vf_loss: 0.01007953301175601
    num_agent_steps_sampled: 785000
    num_agent_steps_trained: 785000
    num_steps_sampled: 785000
    num_steps_trained: 7850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,785,20334.2,785000,-3.2611,-2.26,-5.81,323.16




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 786000
  custom_metrics: {}
  date: 2021-10-24_13-47-43
  done: false
  episode_len_mean: 324.35
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2729999999999744
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2730
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6458908478418985
          entropy_coeff: 0.009999999999999998
          kl: 0.010261733764244517
          policy_loss: 0.0785612240433693
          total_loss: 0.07064186872707473
          vf_explained_var: 0.3915586769580841
          vf_loss: 0.008539552537129364
    num_agent_steps_sampled: 786000
    num_agent_steps_trained: 786000
    num_steps_sampled: 786000
    num_steps_trained: 786000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,786,20382.7,786000,-3.273,-2.26,-5.81,324.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 787000
  custom_metrics: {}
  date: 2021-10-24_13-48-17
  done: false
  episode_len_mean: 325.43
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.2837999999999745
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2733
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.660279683272044
          entropy_coeff: 0.009999999999999998
          kl: 0.00882566047660889
          policy_loss: 0.005619012481636472
          total_loss: -0.0012204719914330375
          vf_explained_var: 0.46324747800827026
          vf_loss: 0.009763310487485594
    num_agent_steps_sampled: 787000
    num_agent_steps_trained: 787000
    num_steps_sampled: 787000
    num_steps_trained: 7870

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,787,20416.4,787000,-3.2838,-2.26,-5.81,325.43


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 788000
  custom_metrics: {}
  date: 2021-10-24_13-48-48
  done: false
  episode_len_mean: 326.53
  episode_media: {}
  episode_reward_max: -2.259999999999996
  episode_reward_mean: -3.294799999999974
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 3
  episodes_total: 2736
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.3868681058017375e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6641796112060547
          entropy_coeff: 0.009999999999999998
          kl: 0.0817338523102617
          policy_loss: -0.1139512225985527
          total_loss: -0.1202177365620931
          vf_explained_var: 0.5141204595565796
          vf_loss: 0.010375282687083301
    num_agent_steps_sampled: 788000
    num_agent_steps_trained: 788000
    num_steps_sampled: 788000
    num_steps_trained: 788000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,788,20448.1,788000,-3.2948,-2.26,-5.81,326.53


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 789000
  custom_metrics: {}
  date: 2021-10-24_13-49-24
  done: false
  episode_len_mean: 328.83
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.3177999999999725
  episode_reward_min: -5.809999999999921
  episodes_this_iter: 4
  episodes_total: 2740
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.080302158702608e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.5759545299741957
          entropy_coeff: 0.009999999999999998
          kl: 0.05994738577403354
          policy_loss: -0.07380693952242533
          total_loss: -0.0795178134408262
          vf_explained_var: 0.5875658988952637
          vf_loss: 0.010048667657085592
    num_agent_steps_sampled: 789000
    num_agent_steps_trained: 789000
    num_steps_sampled: 789000
    num_steps_trained: 789000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,789,20483.6,789000,-3.3178,-2.52,-5.81,328.83


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 790000
  custom_metrics: {}
  date: 2021-10-24_13-49-59
  done: false
  episode_len_mean: 323.66
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.266099999999974
  episode_reward_min: -5.739999999999922
  episodes_this_iter: 3
  episodes_total: 2743
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2120453238053906e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.5506870455212063
          entropy_coeff: 0.009999999999999998
          kl: 0.019783780017461478
          policy_loss: -0.04727967547045814
          total_loss: -0.058337340669499506
          vf_explained_var: 0.852529764175415
          vf_loss: 0.004449203095605804
    num_agent_steps_sampled: 790000
    num_agent_steps_trained: 790000
    num_steps_sampled: 790000
    num_steps_trained: 79000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,790,20519.1,790000,-3.2661,-2.52,-5.74,323.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 791000
  custom_metrics: {}
  date: 2021-10-24_13-50-34
  done: false
  episode_len_mean: 316.71
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.1965999999999766
  episode_reward_min: -4.779999999999963
  episodes_this_iter: 3
  episodes_total: 2746
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2120453238053906e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.4813173505995008
          entropy_coeff: 0.009999999999999998
          kl: 0.0148484169966603
          policy_loss: -0.1395120798713631
          total_loss: -0.148926112966405
          vf_explained_var: 0.8218781352043152
          vf_loss: 0.005399138837431868
    num_agent_steps_sampled: 791000
    num_agent_steps_trained: 791000
    num_steps_sampled: 791000
    num_steps_trained: 791000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,791,20553.9,791000,-3.1966,-2.52,-4.78,316.71


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 792000
  custom_metrics: {}
  date: 2021-10-24_13-51-05
  done: false
  episode_len_mean: 312.37
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.153199999999976
  episode_reward_min: -4.779999999999963
  episodes_this_iter: 4
  episodes_total: 2750
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2120453238053906e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.4730863518185087
          entropy_coeff: 0.009999999999999998
          kl: 0.05758260811377222
          policy_loss: 0.06503781684570842
          total_loss: 0.05556300534970231
          vf_explained_var: 0.8143094182014465
          vf_loss: 0.005256048737404247
    num_agent_steps_sampled: 792000
    num_agent_steps_trained: 792000
    num_steps_sampled: 792000
    num_steps_trained: 792000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,792,20584.6,792000,-3.1532,-2.52,-4.78,312.37


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 793000
  custom_metrics: {}
  date: 2021-10-24_13-51-39
  done: false
  episode_len_mean: 311.15
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.1409999999999774
  episode_reward_min: -4.779999999999963
  episodes_this_iter: 3
  episodes_total: 2753
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.818067985708087e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3426658471425374
          entropy_coeff: 0.009999999999999998
          kl: 0.009729036927925467
          policy_loss: -0.014905399746365018
          total_loss: -0.023176652027501
          vf_explained_var: 0.8114170432090759
          vf_loss: 0.0051554047683667805
    num_agent_steps_sampled: 793000
    num_agent_steps_trained: 793000
    num_steps_sampled: 793000
    num_steps_trained: 79300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,793,20618.6,793000,-3.141,-2.52,-4.78,311.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 794000
  custom_metrics: {}
  date: 2021-10-24_13-52-13
  done: false
  episode_len_mean: 309.9
  episode_media: {}
  episode_reward_max: -2.5199999999999902
  episode_reward_mean: -3.1284999999999767
  episode_reward_min: -4.779999999999963
  episodes_this_iter: 4
  episodes_total: 2757
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.818067985708087e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2779460456636218
          entropy_coeff: 0.009999999999999998
          kl: 0.026026834333702058
          policy_loss: -0.0059776623215940265
          total_loss: -0.011990679800510407
          vf_explained_var: 0.724766731262207
          vf_loss: 0.006766439938089914
    num_agent_steps_sampled: 794000
    num_agent_steps_trained: 794000
    num_steps_sampled: 794000
    num_steps_trained: 7940

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,794,20652.2,794000,-3.1285,-2.52,-4.78,309.9




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 795000
  custom_metrics: {}
  date: 2021-10-24_13-53-02
  done: false
  episode_len_mean: 310.51
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.134599999999977
  episode_reward_min: -4.779999999999963
  episodes_this_iter: 3
  episodes_total: 2760
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.72710197856213e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.4158738851547241
          entropy_coeff: 0.009999999999999998
          kl: 0.010075650585547812
          policy_loss: -0.04514281832509571
          total_loss: -0.05144121530983183
          vf_explained_var: 0.579245388507843
          vf_loss: 0.00786034255870618
    num_agent_steps_sampled: 795000
    num_agent_steps_trained: 795000
    num_steps_sampled: 795000
    num_steps_trained: 795000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,795,20702,795000,-3.1346,-2.4,-4.78,310.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 796000
  custom_metrics: {}
  date: 2021-10-24_13-53-38
  done: false
  episode_len_mean: 309.29
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.122399999999977
  episode_reward_min: -4.779999999999963
  episodes_this_iter: 4
  episodes_total: 2764
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.72710197856213e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.4346382273568048
          entropy_coeff: 0.009999999999999998
          kl: 0.012499310646487485
          policy_loss: 0.0004241079092025757
          total_loss: -0.005348840397265222
          vf_explained_var: 0.5478571057319641
          vf_loss: 0.008573432753069533
    num_agent_steps_sampled: 796000
    num_agent_steps_trained: 796000
    num_steps_sampled: 796000
    num_steps_trained: 796000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,796,20737.7,796000,-3.1224,-2.4,-4.78,309.29


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 797000
  custom_metrics: {}
  date: 2021-10-24_13-54-15
  done: false
  episode_len_mean: 307.75
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.1069999999999784
  episode_reward_min: -4.779999999999963
  episodes_this_iter: 3
  episodes_total: 2767
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.72710197856213e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.5267807417445713
          entropy_coeff: 0.009999999999999998
          kl: 0.01174347427880412
          policy_loss: 0.0008953005903297001
          total_loss: -0.0071014673345618775
          vf_explained_var: 0.45668724179267883
          vf_loss: 0.0072710392065346244
    num_agent_steps_sampled: 797000
    num_agent_steps_trained: 797000
    num_steps_sampled: 797000
    num_steps_trained: 797

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,797,20774.2,797000,-3.107,-2.4,-4.78,307.75


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 798000
  custom_metrics: {}
  date: 2021-10-24_13-54-47
  done: false
  episode_len_mean: 305.98
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.089299999999978
  episode_reward_min: -4.779999999999963
  episodes_this_iter: 4
  episodes_total: 2771
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.72710197856213e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3913313190142313
          entropy_coeff: 0.009999999999999998
          kl: 0.01093003230931973
          policy_loss: -0.002625896698898739
          total_loss: -0.00786510809428162
          vf_explained_var: 0.5248320698738098
          vf_loss: 0.008674101112410426
    num_agent_steps_sampled: 798000
    num_agent_steps_trained: 798000
    num_steps_sampled: 798000
    num_steps_trained: 798000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,798,20806.8,798000,-3.0893,-2.4,-4.78,305.98


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 799000
  custom_metrics: {}
  date: 2021-10-24_13-55-18
  done: false
  episode_len_mean: 304.91
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0785999999999785
  episode_reward_min: -4.779999999999963
  episodes_this_iter: 3
  episodes_total: 2774
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.72710197856213e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1653160439597237
          entropy_coeff: 0.009999999999999998
          kl: 0.010752599473344482
          policy_loss: 0.016292994634972677
          total_loss: 0.011822712752554152
          vf_explained_var: 0.4535139203071594
          vf_loss: 0.00718287501949817
    num_agent_steps_sampled: 799000
    num_agent_steps_trained: 799000
    num_steps_sampled: 799000
    num_steps_trained: 799000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,799,20837.7,799000,-3.0786,-2.4,-4.78,304.91


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 800000
  custom_metrics: {}
  date: 2021-10-24_13-55-49
  done: false
  episode_len_mean: 304.24
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0522999999999785
  episode_reward_min: -4.209999999999954
  episodes_this_iter: 3
  episodes_total: 2777
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.72710197856213e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2684908628463745
          entropy_coeff: 0.009999999999999998
          kl: 0.013620958969009797
          policy_loss: -0.11376898503965802
          total_loss: -0.11669557094573975
          vf_explained_var: 0.3422700762748718
          vf_loss: 0.009758322111641368
    num_agent_steps_sampled: 800000
    num_agent_steps_trained: 800000
    num_steps_sampled: 800000
    num_steps_trained: 800000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,800,20868.2,800000,-3.0523,-2.4,-4.21,304.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 801000
  custom_metrics: {}
  date: 2021-10-24_13-56-20
  done: false
  episode_len_mean: 302.62
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0360999999999785
  episode_reward_min: -4.209999999999954
  episodes_this_iter: 4
  episodes_total: 2781
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.72710197856213e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1910893042882285
          entropy_coeff: 0.009999999999999998
          kl: 0.016626177709481264
          policy_loss: 0.01601391140785482
          total_loss: 0.016064710128638478
          vf_explained_var: 0.2664908766746521
          vf_loss: 0.01196169105047981
    num_agent_steps_sampled: 801000
    num_agent_steps_trained: 801000
    num_steps_sampled: 801000
    num_steps_trained: 801000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,801,20899.3,801000,-3.0361,-2.4,-4.21,302.62


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 802000
  custom_metrics: {}
  date: 2021-10-24_13-56-54
  done: false
  episode_len_mean: 301.97
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.029599999999979
  episode_reward_min: -4.209999999999954
  episodes_this_iter: 3
  episodes_total: 2784
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.72710197856213e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.208859501944648
          entropy_coeff: 0.009999999999999998
          kl: 0.010923391008218965
          policy_loss: 0.0588922461701764
          total_loss: 0.055699704256322646
          vf_explained_var: 0.21114476025104523
          vf_loss: 0.008896054037743144
    num_agent_steps_sampled: 802000
    num_agent_steps_trained: 802000
    num_steps_sampled: 802000
    num_steps_trained: 802000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,802,20932.9,802000,-3.0296,-2.4,-4.21,301.97


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 803000
  custom_metrics: {}
  date: 2021-10-24_13-57-26
  done: false
  episode_len_mean: 300.94
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.00939999999998
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 4
  episodes_total: 2788
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.72710197856213e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1311764816443126
          entropy_coeff: 0.009999999999999998
          kl: 0.00491776611151301
          policy_loss: -0.008846146861712138
          total_loss: -0.00651830674873458
          vf_explained_var: 0.21833781898021698
          vf_loss: 0.013639605986989206
    num_agent_steps_sampled: 803000
    num_agent_steps_trained: 803000
    num_steps_sampled: 803000
    num_steps_trained: 803000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,803,20965.8,803000,-3.0094,-2.4,-3.49,300.94




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 804000
  custom_metrics: {}
  date: 2021-10-24_13-58-17
  done: false
  episode_len_mean: 300.02
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0001999999999804
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 2791
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.363550989281065e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.153990450170305
          entropy_coeff: 0.009999999999999998
          kl: 0.01070345366295192
          policy_loss: 0.02450390557448069
          total_loss: 0.022026515503724416
          vf_explained_var: 0.3939598500728607
          vf_loss: 0.009062514030503937
    num_agent_steps_sampled: 804000
    num_agent_steps_trained: 804000
    num_steps_sampled: 804000
    num_steps_trained: 804000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,804,21016.6,804000,-3.0002,-2.4,-3.49,300.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 805000
  custom_metrics: {}
  date: 2021-10-24_13-58-49
  done: false
  episode_len_mean: 299.34
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9933999999999794
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 2794
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.363550989281065e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2228176911671957
          entropy_coeff: 0.009999999999999998
          kl: 0.009657202232230589
          policy_loss: -0.10899533977111181
          total_loss: -0.1086574438545439
          vf_explained_var: 0.24696150422096252
          vf_loss: 0.012566075173930989
    num_agent_steps_sampled: 805000
    num_agent_steps_trained: 805000
    num_steps_sampled: 805000
    num_steps_trained: 80500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,805,21047.9,805000,-2.9934,-2.4,-3.49,299.34


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 806000
  custom_metrics: {}
  date: 2021-10-24_13-59-20
  done: false
  episode_len_mean: 299.33
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.993299999999979
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 4
  episodes_total: 2798
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.363550989281065e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1096689820289611
          entropy_coeff: 0.009999999999999998
          kl: 0.010678875652147665
          policy_loss: 0.0008935411771138509
          total_loss: 0.0018590984245141348
          vf_explained_var: 0.2791752815246582
          vf_loss: 0.012062248815264966
    num_agent_steps_sampled: 806000
    num_agent_steps_trained: 806000
    num_steps_sampled: 806000
    num_steps_trained: 8060

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,806,21079.1,806000,-2.9933,-2.4,-3.49,299.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 807000
  custom_metrics: {}
  date: 2021-10-24_13-59-53
  done: false
  episode_len_mean: 299.6
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.995999999999979
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 2801
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.363550989281065e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2325221644507514
          entropy_coeff: 0.009999999999999998
          kl: 0.011542643321592137
          policy_loss: 0.05329677859942118
          total_loss: 0.05174435998002688
          vf_explained_var: 0.10716021060943604
          vf_loss: 0.010772799597018295
    num_agent_steps_sampled: 807000
    num_agent_steps_trained: 807000
    num_steps_sampled: 807000
    num_steps_trained: 807000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,807,21112.1,807000,-2.996,-2.4,-3.49,299.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 808000
  custom_metrics: {}
  date: 2021-10-24_14-00-27
  done: false
  episode_len_mean: 299.17
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9916999999999803
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 2804
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.363550989281065e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9845189147525364
          entropy_coeff: 0.009999999999999998
          kl: 0.008072051989668694
          policy_loss: -0.09896096967988544
          total_loss: -0.09613819652133518
          vf_explained_var: 0.3690199553966522
          vf_loss: 0.012667959814684259
    num_agent_steps_sampled: 808000
    num_agent_steps_trained: 808000
    num_steps_sampled: 808000
    num_steps_trained: 80800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,808,21146.7,808000,-2.9917,-2.4,-3.49,299.17


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 809000
  custom_metrics: {}
  date: 2021-10-24_14-01-01
  done: false
  episode_len_mean: 298.74
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.98739999999998
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 4
  episodes_total: 2808
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.363550989281065e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.052888591421975
          entropy_coeff: 0.009999999999999998
          kl: 0.012479994669468258
          policy_loss: 0.013354011376698812
          total_loss: 0.014411403404341803
          vf_explained_var: 0.4397616386413574
          vf_loss: 0.011586278014712863
    num_agent_steps_sampled: 809000
    num_agent_steps_trained: 809000
    num_steps_sampled: 809000
    num_steps_trained: 809000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,809,21180,809000,-2.9874,-2.4,-3.49,298.74


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 810000
  custom_metrics: {}
  date: 2021-10-24_14-01-36
  done: false
  episode_len_mean: 298.5
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9849999999999794
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 2811
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.363550989281065e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9786860518985324
          entropy_coeff: 0.009999999999999998
          kl: 0.0065430475091485365
          policy_loss: -0.05104437039958106
          total_loss: -0.04982565575175815
          vf_explained_var: 0.39285561442375183
          vf_loss: 0.011005576461967495
    num_agent_steps_sampled: 810000
    num_agent_steps_trained: 810000
    num_steps_sampled: 810000
    num_steps_trained: 8100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,810,21214.8,810000,-2.985,-2.4,-3.49,298.5


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 811000
  custom_metrics: {}
  date: 2021-10-24_14-02-10
  done: false
  episode_len_mean: 297.43
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9742999999999813
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 4
  episodes_total: 2815
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.363550989281065e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1708005640241834
          entropy_coeff: 0.009999999999999998
          kl: 0.017178668353457344
          policy_loss: 0.031082888402872617
          total_loss: 0.030086131021380424
          vf_explained_var: 0.6185087561607361
          vf_loss: 0.010711247153166268
    num_agent_steps_sampled: 811000
    num_agent_steps_trained: 811000
    num_steps_sampled: 811000
    num_steps_trained: 81100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,811,21249.6,811000,-2.9743,-2.4,-3.49,297.43


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 812000
  custom_metrics: {}
  date: 2021-10-24_14-02-44
  done: false
  episode_len_mean: 296.22
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.96219999999998
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 2818
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.363550989281065e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0038144965966542
          entropy_coeff: 0.009999999999999998
          kl: 0.003062113045811543
          policy_loss: 0.04050092101097107
          total_loss: 0.03776545971632004
          vf_explained_var: 0.6371682286262512
          vf_loss: 0.007302683809151252
    num_agent_steps_sampled: 812000
    num_agent_steps_trained: 812000
    num_steps_sampled: 812000
    num_steps_trained: 812000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,812,21283.3,812000,-2.9622,-2.4,-3.49,296.22




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 813000
  custom_metrics: {}
  date: 2021-10-24_14-03-36
  done: false
  episode_len_mean: 295.05
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9504999999999812
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 4
  episodes_total: 2822
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.817754946405326e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.0428522573577033
          entropy_coeff: 0.009999999999999998
          kl: 0.011268550048267024
          policy_loss: 0.014833258920245701
          total_loss: 0.014936356287863519
          vf_explained_var: 0.5241327881813049
          vf_loss: 0.01053162167987062
    num_agent_steps_sampled: 813000
    num_agent_steps_trained: 813000
    num_steps_sampled: 813000
    num_steps_trained: 813000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,813,21335,813000,-2.9505,-2.4,-3.49,295.05


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 814000
  custom_metrics: {}
  date: 2021-10-24_14-04-08
  done: false
  episode_len_mean: 294.46
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9445999999999812
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 2825
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.817754946405326e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9681215544541677
          entropy_coeff: 0.009999999999999998
          kl: 0.03252030164885369
          policy_loss: -0.03566189722882377
          total_loss: -0.03687481962972217
          vf_explained_var: 0.5125224590301514
          vf_loss: 0.008468290928026869
    num_agent_steps_sampled: 814000
    num_agent_steps_trained: 814000
    num_steps_sampled: 814000
    num_steps_trained: 814000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,814,21366.7,814000,-2.9446,-2.4,-3.49,294.46


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 815000
  custom_metrics: {}
  date: 2021-10-24_14-04-38
  done: false
  episode_len_mean: 294.22
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9421999999999815
  episode_reward_min: -3.4899999999999696
  episodes_this_iter: 3
  episodes_total: 2828
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0226632419607988e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0038778457376691
          entropy_coeff: 0.009999999999999998
          kl: 0.01171707762750521
          policy_loss: -0.15488775024811427
          total_loss: -0.15575557781590355
          vf_explained_var: 0.5534805059432983
          vf_loss: 0.009170946266709102
    num_agent_steps_sampled: 815000
    num_agent_steps_trained: 815000
    num_steps_sampled: 815000
    num_steps_trained: 81500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,815,21397,815000,-2.9422,-2.4,-3.49,294.22


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 816000
  custom_metrics: {}
  date: 2021-10-24_14-05-07
  done: false
  episode_len_mean: 295.29
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.952899999999981
  episode_reward_min: -3.709999999999965
  episodes_this_iter: 3
  episodes_total: 2831
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0226632419607988e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9738003008895451
          entropy_coeff: 0.009999999999999998
          kl: 0.017713029461234934
          policy_loss: -0.14627193162838617
          total_loss: -0.145665093511343
          vf_explained_var: 0.39105525612831116
          vf_loss: 0.010344839158157508
    num_agent_steps_sampled: 816000
    num_agent_steps_trained: 816000
    num_steps_sampled: 816000
    num_steps_trained: 816000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,816,21425.7,816000,-2.9529,-2.4,-3.71,295.29


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 817000
  custom_metrics: {}
  date: 2021-10-24_14-05-37
  done: false
  episode_len_mean: 295.05
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9504999999999812
  episode_reward_min: -3.709999999999965
  episodes_this_iter: 4
  episodes_total: 2835
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0226632419607988e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0779797004328833
          entropy_coeff: 0.009999999999999998
          kl: 0.011409633876348834
          policy_loss: 0.009401664675937758
          total_loss: 0.008972332916325994
          vf_explained_var: 0.3058617413043976
          vf_loss: 0.010350465971148677
    num_agent_steps_sampled: 817000
    num_agent_steps_trained: 817000
    num_steps_sampled: 817000
    num_steps_trained: 81700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,817,21455.7,817000,-2.9505,-2.4,-3.71,295.05


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 818000
  custom_metrics: {}
  date: 2021-10-24_14-06-04
  done: false
  episode_len_mean: 295.45
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9544999999999813
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 2
  episodes_total: 2837
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0226632419607988e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.087579039732615
          entropy_coeff: 0.009999999999999998
          kl: 0.012969416508182185
          policy_loss: -0.10681812034712898
          total_loss: -0.10922404593891567
          vf_explained_var: 0.33674538135528564
          vf_loss: 0.008469861982545505
    num_agent_steps_sampled: 818000
    num_agent_steps_trained: 818000
    num_steps_sampled: 818000
    num_steps_trained: 8180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,818,21483.2,818000,-2.9545,-2.4,-3.97,295.45


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 819000
  custom_metrics: {}
  date: 2021-10-24_14-06-33
  done: false
  episode_len_mean: 296.58
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9657999999999816
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2840
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0226632419607988e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.036473337146971
          entropy_coeff: 0.009999999999999998
          kl: 0.01141399358441725
          policy_loss: -0.10668763750129276
          total_loss: -0.10773021992709901
          vf_explained_var: 0.30629056692123413
          vf_loss: 0.009322152928345734
    num_agent_steps_sampled: 819000
    num_agent_steps_trained: 819000
    num_steps_sampled: 819000
    num_steps_trained: 81900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,819,21511.6,819000,-2.9658,-2.4,-3.97,296.58


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 820000
  custom_metrics: {}
  date: 2021-10-24_14-07-02
  done: false
  episode_len_mean: 298.15
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.98149999999998
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2844
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0226632419607988e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0723846521642473
          entropy_coeff: 0.009999999999999998
          kl: 0.034839250128132764
          policy_loss: 0.010217192355129454
          total_loss: 0.00981936508582698
          vf_explained_var: 0.38548609614372253
          vf_loss: 0.010326017460061444
    num_agent_steps_sampled: 820000
    num_agent_steps_trained: 820000
    num_steps_sampled: 820000
    num_steps_trained: 820000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,820,21541.3,820000,-2.9815,-2.4,-3.97,298.15


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 821000
  custom_metrics: {}
  date: 2021-10-24_14-07-33
  done: false
  episode_len_mean: 298.49
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9848999999999806
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2847
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5339948629411983e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0535448332627615
          entropy_coeff: 0.009999999999999998
          kl: 0.009203924984630005
          policy_loss: 0.05390461170011097
          total_loss: 0.05206727700101005
          vf_explained_var: 0.4450747072696686
          vf_loss: 0.008698114201736946
    num_agent_steps_sampled: 821000
    num_agent_steps_trained: 821000
    num_steps_sampled: 821000
    num_steps_trained: 821000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,821,21571.8,821000,-2.9849,-2.4,-3.97,298.49




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 822000
  custom_metrics: {}
  date: 2021-10-24_14-08-21
  done: false
  episode_len_mean: 298.52
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.9851999999999803
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2850
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5339948629411983e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0312550054656136
          entropy_coeff: 0.009999999999999998
          kl: 0.010688157765615674
          policy_loss: -0.08224083011349043
          total_loss: -0.08057408514950011
          vf_explained_var: 0.22652968764305115
          vf_loss: 0.011979296327465111
    num_agent_steps_sampled: 822000
    num_agent_steps_trained: 822000
    num_steps_sampled: 822000
    num_steps_trained: 822

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,822,21619.6,822000,-2.9852,-2.4,-3.97,298.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 823000
  custom_metrics: {}
  date: 2021-10-24_14-08-55
  done: false
  episode_len_mean: 298.26
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.98259999999998
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2854
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5339948629411983e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9941312465402815
          entropy_coeff: 0.009999999999999998
          kl: 0.0187864780915928
          policy_loss: 0.009696690117319424
          total_loss: 0.011182642065816456
          vf_explained_var: 0.29774999618530273
          vf_loss: 0.011427263584401872
    num_agent_steps_sampled: 823000
    num_agent_steps_trained: 823000
    num_steps_sampled: 823000
    num_steps_trained: 823000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,823,21654.1,823000,-2.9826,-2.4,-3.97,298.26


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 824000
  custom_metrics: {}
  date: 2021-10-24_14-09-27
  done: false
  episode_len_mean: 298.29
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.98289999999998
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2857
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5339948629411983e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.011530602640576
          entropy_coeff: 0.009999999999999998
          kl: 0.006254589033282403
          policy_loss: -0.11781252788172827
          total_loss: -0.11539139101902644
          vf_explained_var: 0.1494928002357483
          vf_loss: 0.01253644216598736
    num_agent_steps_sampled: 824000
    num_agent_steps_trained: 824000
    num_steps_sampled: 824000
    num_steps_trained: 824000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,824,21686.1,824000,-2.9829,-2.4,-3.97,298.29


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 825000
  custom_metrics: {}
  date: 2021-10-24_14-10-00
  done: false
  episode_len_mean: 298.8
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.98799999999998
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2861
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5339948629411983e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1953377710448372
          entropy_coeff: 0.009999999999999998
          kl: 0.01227228631481836
          policy_loss: 0.029447115378247367
          total_loss: 0.028085809739099608
          vf_explained_var: 0.4302710294723511
          vf_loss: 0.010592068814569049
    num_agent_steps_sampled: 825000
    num_agent_steps_trained: 825000
    num_steps_sampled: 825000
    num_steps_trained: 825000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,825,21718.6,825000,-2.988,-2.4,-3.97,298.8


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 826000
  custom_metrics: {}
  date: 2021-10-24_14-10-30
  done: false
  episode_len_mean: 299.41
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -2.99409999999998
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2864
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5339948629411983e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2017487261030408
          entropy_coeff: 0.009999999999999998
          kl: 0.044025128815993894
          policy_loss: 0.05104037490155962
          total_loss: 0.047433480620384216
          vf_explained_var: 0.3574709892272949
          vf_loss: 0.008410587126854807
    num_agent_steps_sampled: 826000
    num_agent_steps_trained: 826000
    num_steps_sampled: 826000
    num_steps_trained: 826000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,826,21749.2,826000,-2.9941,-2.4,-3.97,299.41


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 827000
  custom_metrics: {}
  date: 2021-10-24_14-11-01
  done: false
  episode_len_mean: 300.16
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0015999999999803
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2867
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3009922944117966e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2731255385610791
          entropy_coeff: 0.009999999999999998
          kl: 0.027515283716720754
          policy_loss: 0.023457959211534925
          total_loss: 0.020047931869824728
          vf_explained_var: 0.20763395726680756
          vf_loss: 0.009321224715353713
    num_agent_steps_sampled: 827000
    num_agent_steps_trained: 827000
    num_steps_sampled: 827000
    num_steps_trained: 827

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,827,21779.4,827000,-3.0016,-2.4,-3.97,300.16


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 828000
  custom_metrics: {}
  date: 2021-10-24_14-11-30
  done: false
  episode_len_mean: 301.16
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0115999999999796
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2870
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4514884416176967e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2714800715446473
          entropy_coeff: 0.009999999999999998
          kl: 0.01490237103284097
          policy_loss: -0.10376206222507689
          total_loss: -0.1044488449063566
          vf_explained_var: 0.28051331639289856
          vf_loss: 0.01202801608790954
    num_agent_steps_sampled: 828000
    num_agent_steps_trained: 828000
    num_steps_sampled: 828000
    num_steps_trained: 828000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,828,21808.5,828000,-3.0116,-2.4,-3.97,301.16


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 829000
  custom_metrics: {}
  date: 2021-10-24_14-12-00
  done: false
  episode_len_mean: 302.06
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.020599999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2873
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4514884416176967e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.205049455165863
          entropy_coeff: 0.009999999999999998
          kl: 0.00643809125421334
          policy_loss: -0.0934809788233704
          total_loss: -0.09432101473212243
          vf_explained_var: 0.2790212333202362
          vf_loss: 0.01121045420360234
    num_agent_steps_sampled: 829000
    num_agent_steps_trained: 829000
    num_steps_sampled: 829000
    num_steps_trained: 829000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,829,21839,829000,-3.0206,-2.4,-3.97,302.06


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 830000
  custom_metrics: {}
  date: 2021-10-24_14-12-31
  done: false
  episode_len_mean: 302.21
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0220999999999787
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2877
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4514884416176967e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2258878416485257
          entropy_coeff: 0.009999999999999998
          kl: 0.013309316934092496
          policy_loss: 0.018760150174299876
          total_loss: 0.019010857244332633
          vf_explained_var: 0.27578049898147583
          vf_loss: 0.012509587282935778
    num_agent_steps_sampled: 830000
    num_agent_steps_trained: 830000
    num_steps_sampled: 830000
    num_steps_trained: 830

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,830,21869.9,830000,-3.0221,-2.4,-3.97,302.21




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 831000
  custom_metrics: {}
  date: 2021-10-24_14-13-18
  done: false
  episode_len_mean: 301.63
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.016299999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2880
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4514884416176967e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2501302149560716
          entropy_coeff: 0.009999999999999998
          kl: 0.01167123585715078
          policy_loss: -0.03231607046392229
          total_loss: -0.03498509600758552
          vf_explained_var: 0.2520853579044342
          vf_loss: 0.009832274944831928
    num_agent_steps_sampled: 831000
    num_agent_steps_trained: 831000
    num_steps_sampled: 831000
    num_steps_trained: 831000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,831,21916.4,831000,-3.0163,-2.4,-3.97,301.63


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 832000
  custom_metrics: {}
  date: 2021-10-24_14-13-52
  done: false
  episode_len_mean: 301.6
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.015999999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2884
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4514884416176967e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1407834092775981
          entropy_coeff: 0.009999999999999998
          kl: 0.027724158549435333
          policy_loss: -0.03808104279968474
          total_loss: -0.037787234596908095
          vf_explained_var: 0.4388044476509094
          vf_loss: 0.011701636502726211
    num_agent_steps_sampled: 832000
    num_agent_steps_trained: 832000
    num_steps_sampled: 832000
    num_steps_trained: 83200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,832,21951.1,832000,-3.016,-2.4,-3.97,301.6


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 833000
  custom_metrics: {}
  date: 2021-10-24_14-14-25
  done: false
  episode_len_mean: 301.78
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.01779999999998
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2887
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2018525388505723
          entropy_coeff: 0.009999999999999998
          kl: 0.009995695521802933
          policy_loss: 0.06148504209187296
          total_loss: 0.058541967802577546
          vf_explained_var: 0.45133349299430847
          vf_loss: 0.009075449332724222
    num_agent_steps_sampled: 833000
    num_agent_steps_trained: 833000
    num_steps_sampled: 833000
    num_steps_trained: 833000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,833,21983.5,833000,-3.0178,-2.4,-3.97,301.78


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 834000
  custom_metrics: {}
  date: 2021-10-24_14-14-56
  done: false
  episode_len_mean: 302.87
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.028699999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2890
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1658782323201498
          entropy_coeff: 0.009999999999999998
          kl: 0.010528033397565018
          policy_loss: 0.04220283553004265
          total_loss: 0.03960819376839532
          vf_explained_var: 0.3525668680667877
          vf_loss: 0.009064138686517254
    num_agent_steps_sampled: 834000
    num_agent_steps_trained: 834000
    num_steps_sampled: 834000
    num_steps_trained: 834000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,834,22014.3,834000,-3.0287,-2.4,-3.97,302.87


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 835000
  custom_metrics: {}
  date: 2021-10-24_14-15-26
  done: false
  episode_len_mean: 303.47
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.0346999999999786
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2893
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2227878385119968
          entropy_coeff: 0.009999999999999998
          kl: 0.013570538194273615
          policy_loss: -0.03466403020752801
          total_loss: -0.036336799959341685
          vf_explained_var: 0.14859938621520996
          vf_loss: 0.010555107958821787
    num_agent_steps_sampled: 835000
    num_agent_steps_trained: 835000
    num_steps_sampled: 835000
    num_steps_trained: 835

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,835,22044.4,835000,-3.0347,-2.4,-3.97,303.47


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 836000
  custom_metrics: {}
  date: 2021-10-24_14-15-58
  done: false
  episode_len_mean: 303.81
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.038099999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2896
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1470203147994147
          entropy_coeff: 0.009999999999999998
          kl: 0.009074984693783409
          policy_loss: -0.09748465857572025
          total_loss: -0.09627792040506998
          vf_explained_var: 0.18601594865322113
          vf_loss: 0.012676936098270947
    num_agent_steps_sampled: 836000
    num_agent_steps_trained: 836000
    num_steps_sampled: 836000
    num_steps_trained: 83600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,836,22077,836000,-3.0381,-2.4,-3.97,303.81


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 837000
  custom_metrics: {}
  date: 2021-10-24_14-16-30
  done: false
  episode_len_mean: 303.9
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.038999999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2900
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1035032682948642
          entropy_coeff: 0.009999999999999998
          kl: 0.010777756195532914
          policy_loss: 0.01958028193977144
          total_loss: 0.02142199402054151
          vf_explained_var: 0.22594352066516876
          vf_loss: 0.01287674384398593
    num_agent_steps_sampled: 837000
    num_agent_steps_trained: 837000
    num_steps_sampled: 837000
    num_steps_trained: 837000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,837,22108.5,837000,-3.039,-2.4,-3.97,303.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 838000
  custom_metrics: {}
  date: 2021-10-24_14-17-03
  done: false
  episode_len_mean: 303.7
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.036999999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2903
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1222192247708638
          entropy_coeff: 0.009999999999999998
          kl: 0.005866464051716886
          policy_loss: 0.03642935703198115
          total_loss: 0.03468958934148152
          vf_explained_var: 0.3281368911266327
          vf_loss: 0.009482426528120414
    num_agent_steps_sampled: 838000
    num_agent_steps_trained: 838000
    num_steps_sampled: 838000
    num_steps_trained: 838000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,838,22141.1,838000,-3.037,-2.4,-3.97,303.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 839000
  custom_metrics: {}
  date: 2021-10-24_14-17-34
  done: false
  episode_len_mean: 303.47
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.034699999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2907
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.092124303181966
          entropy_coeff: 0.009999999999999998
          kl: 0.01080919698675255
          policy_loss: 0.013004951261811786
          total_loss: 0.01549281585547659
          vf_explained_var: 0.24725140631198883
          vf_loss: 0.013409106836964687
    num_agent_steps_sampled: 839000
    num_agent_steps_trained: 839000
    num_steps_sampled: 839000
    num_steps_trained: 839000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,839,22172.3,839000,-3.0347,-2.4,-3.97,303.47




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 840000
  custom_metrics: {}
  date: 2021-10-24_14-18-25
  done: false
  episode_len_mean: 303.31
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.033099999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2910
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1016767263412475
          entropy_coeff: 0.009999999999999998
          kl: 0.013763868220652418
          policy_loss: -0.012457702888382806
          total_loss: -0.013582099808586968
          vf_explained_var: 0.33454012870788574
          vf_loss: 0.009892368482542224
    num_agent_steps_sampled: 840000
    num_agent_steps_trained: 840000
    num_steps_sampled: 840000
    num_steps_trained: 840

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,840,22223.3,840000,-3.0331,-2.4,-3.97,303.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 841000
  custom_metrics: {}
  date: 2021-10-24_14-18-57
  done: false
  episode_len_mean: 303.61
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.036099999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2914
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.138350952996148
          entropy_coeff: 0.009999999999999998
          kl: 0.015945276634026898
          policy_loss: -0.01278728089398808
          total_loss: -0.01148896167675654
          vf_explained_var: 0.19995729625225067
          vf_loss: 0.012681826586938567
    num_agent_steps_sampled: 841000
    num_agent_steps_trained: 841000
    num_steps_sampled: 841000
    num_steps_trained: 841000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,841,22255.7,841000,-3.0361,-2.4,-3.97,303.61


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 842000
  custom_metrics: {}
  date: 2021-10-24_14-19-29
  done: false
  episode_len_mean: 303.94
  episode_media: {}
  episode_reward_max: -2.399999999999993
  episode_reward_mean: -3.039399999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2917
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1423868629667493
          entropy_coeff: 0.009999999999999998
          kl: 0.0077906608500774395
          policy_loss: 0.049740736931562425
          total_loss: 0.048802559326092405
          vf_explained_var: 0.2746739387512207
          vf_loss: 0.010485686902474199
    num_agent_steps_sampled: 842000
    num_agent_steps_trained: 842000
    num_steps_sampled: 842000
    num_steps_trained: 84200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,842,22287.4,842000,-3.0394,-2.4,-3.97,303.94


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 843000
  custom_metrics: {}
  date: 2021-10-24_14-20-02
  done: false
  episode_len_mean: 304.48
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.0447999999999786
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2920
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1651702801386514
          entropy_coeff: 0.009999999999999998
          kl: 0.009569742209233172
          policy_loss: -0.05310171677006616
          total_loss: -0.051697463128301833
          vf_explained_var: 0.15917806327342987
          vf_loss: 0.013055953942239285
    num_agent_steps_sampled: 843000
    num_agent_steps_trained: 843000
    num_steps_sampled: 843000
    num_steps_trained: 843

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,843,22320.3,843000,-3.0448,-2.57,-3.97,304.48


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 844000
  custom_metrics: {}
  date: 2021-10-24_14-20-34
  done: false
  episode_len_mean: 304.38
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.0437999999999783
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2924
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.16817225350274
          entropy_coeff: 0.009999999999999998
          kl: 0.013965246384159337
          policy_loss: 0.010567739026414023
          total_loss: 0.013095755378405254
          vf_explained_var: 0.25640198588371277
          vf_loss: 0.01420973568326897
    num_agent_steps_sampled: 844000
    num_agent_steps_trained: 844000
    num_steps_sampled: 844000
    num_steps_trained: 844000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,844,22352.8,844000,-3.0438,-2.57,-3.97,304.38


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 845000
  custom_metrics: {}
  date: 2021-10-24_14-21-07
  done: false
  episode_len_mean: 304.31
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.043099999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2927
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1640701002544827
          entropy_coeff: 0.009999999999999998
          kl: 0.009835497064820705
          policy_loss: 0.03593531482749515
          total_loss: 0.03422314309411579
          vf_explained_var: 0.23579257726669312
          vf_loss: 0.009928527528730531
    num_agent_steps_sampled: 845000
    num_agent_steps_trained: 845000
    num_steps_sampled: 845000
    num_steps_trained: 845000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,845,22385.2,845000,-3.0431,-2.57,-3.97,304.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 846000
  custom_metrics: {}
  date: 2021-10-24_14-21-37
  done: false
  episode_len_mean: 303.8
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.037999999999979
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 3
  episodes_total: 2930
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1724518524275886
          entropy_coeff: 0.009999999999999998
          kl: 0.0128826537696768
          policy_loss: -0.11851653647091653
          total_loss: -0.11631747335195541
          vf_explained_var: 0.20289120078086853
          vf_loss: 0.013923579526858198
    num_agent_steps_sampled: 846000
    num_agent_steps_trained: 846000
    num_steps_sampled: 846000
    num_steps_trained: 846000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,846,22415.7,846000,-3.038,-2.57,-3.97,303.8


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 847000
  custom_metrics: {}
  date: 2021-10-24_14-22-10
  done: false
  episode_len_mean: 302.39
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.0238999999999794
  episode_reward_min: -3.9699999999999593
  episodes_this_iter: 4
  episodes_total: 2934
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2223837494850158
          entropy_coeff: 0.009999999999999998
          kl: 0.009110982215029395
          policy_loss: 0.04296711766057544
          total_loss: 0.04337434619665146
          vf_explained_var: 0.19404661655426025
          vf_loss: 0.012631061160936952
    num_agent_steps_sampled: 847000
    num_agent_steps_trained: 847000
    num_steps_sampled: 847000
    num_steps_trained: 847000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,847,22447.9,847000,-3.0239,-2.57,-3.97,302.39


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 848000
  custom_metrics: {}
  date: 2021-10-24_14-22-42
  done: false
  episode_len_mean: 301.37
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.013699999999979
  episode_reward_min: -3.6799999999999655
  episodes_this_iter: 3
  episodes_total: 2937
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2693618986341688
          entropy_coeff: 0.009999999999999998
          kl: 0.011536464919163257
          policy_loss: -0.013530953311257893
          total_loss: -0.01501491086350547
          vf_explained_var: 0.1903633326292038
          vf_loss: 0.011209661955945194
    num_agent_steps_sampled: 848000
    num_agent_steps_trained: 848000
    num_steps_sampled: 848000
    num_steps_trained: 84800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,848,22480.6,848000,-3.0137,-2.57,-3.68,301.37




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 849000
  custom_metrics: {}
  date: 2021-10-24_14-23-31
  done: false
  episode_len_mean: 300.42
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.0041999999999796
  episode_reward_min: -3.6799999999999655
  episodes_this_iter: 3
  episodes_total: 2940
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2056357476446364
          entropy_coeff: 0.009999999999999998
          kl: 0.00741547029336809
          policy_loss: -0.10476919180817074
          total_loss: -0.10479369991355472
          vf_explained_var: 0.210309237241745
          vf_loss: 0.012031847181626492
    num_agent_steps_sampled: 849000
    num_agent_steps_trained: 849000
    num_steps_sampled: 849000
    num_steps_trained: 849000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,849,22529.2,849000,-3.0042,-2.57,-3.68,300.42


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 850000
  custom_metrics: {}
  date: 2021-10-24_14-24-00
  done: false
  episode_len_mean: 300.41
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.0040999999999793
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 3
  episodes_total: 2943
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.146942060523563
          entropy_coeff: 0.009999999999999998
          kl: 0.014927168507668754
          policy_loss: -0.12197377731402716
          total_loss: -0.11831879996591144
          vf_explained_var: 0.18100637197494507
          vf_loss: 0.01512439777660701
    num_agent_steps_sampled: 850000
    num_agent_steps_trained: 850000
    num_steps_sampled: 850000
    num_steps_trained: 850000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,850,22558.4,850000,-3.0041,-2.57,-3.4,300.41


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 851000
  custom_metrics: {}
  date: 2021-10-24_14-24-30
  done: false
  episode_len_mean: 300.96
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.009599999999979
  episode_reward_min: -3.3999999999999715
  episodes_this_iter: 4
  episodes_total: 2947
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0724093033207787
          entropy_coeff: 0.009999999999999998
          kl: 0.006942626478449417
          policy_loss: 0.008585747828086217
          total_loss: 0.012394519067472881
          vf_explained_var: 0.17548911273479462
          vf_loss: 0.014532861279116736
    num_agent_steps_sampled: 851000
    num_agent_steps_trained: 851000
    num_steps_sampled: 851000
    num_steps_trained: 85100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,851,22588.3,851000,-3.0096,-2.57,-3.4,300.96


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 852000
  custom_metrics: {}
  date: 2021-10-24_14-24-59
  done: false
  episode_len_mean: 301.78
  episode_media: {}
  episode_reward_max: -2.569999999999989
  episode_reward_mean: -3.0177999999999794
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 2
  episodes_total: 2949
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0770132939020793
          entropy_coeff: 0.009999999999999998
          kl: 0.011240585750015776
          policy_loss: -0.1095571239789327
          total_loss: -0.10960904955863952
          vf_explained_var: 0.2674984335899353
          vf_loss: 0.010718211795837205
    num_agent_steps_sampled: 852000
    num_agent_steps_trained: 852000
    num_steps_sampled: 852000
    num_steps_trained: 852000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,852,22617,852000,-3.0178,-2.57,-3.53,301.78


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 853000
  custom_metrics: {}
  date: 2021-10-24_14-25-28
  done: false
  episode_len_mean: 303.53
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.0352999999999795
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 3
  episodes_total: 2952
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0293932769033645
          entropy_coeff: 0.009999999999999998
          kl: 0.006824065120119372
          policy_loss: -0.10629443559381697
          total_loss: -0.10372963945070902
          vf_explained_var: 0.15109558403491974
          vf_loss: 0.01285872686550849
    num_agent_steps_sampled: 853000
    num_agent_steps_trained: 853000
    num_steps_sampled: 853000
    num_steps_trained: 8530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,853,22645.8,853000,-3.0353,-2.61,-3.53,303.53


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 854000
  custom_metrics: {}
  date: 2021-10-24_14-25-55
  done: false
  episode_len_mean: 305.02
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.050199999999979
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 4
  episodes_total: 2956
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9819774581326379
          entropy_coeff: 0.009999999999999998
          kl: 0.009387056393524535
          policy_loss: 0.006314543262124062
          total_loss: 0.011875573048988978
          vf_explained_var: 0.12430036067962646
          vf_loss: 0.015380801984833346
    num_agent_steps_sampled: 854000
    num_agent_steps_trained: 854000
    num_steps_sampled: 854000
    num_steps_trained: 8540

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,854,22673.4,854000,-3.0502,-2.61,-3.53,305.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 855000
  custom_metrics: {}
  date: 2021-10-24_14-26-25
  done: false
  episode_len_mean: 305.54
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.055399999999979
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 3
  episodes_total: 2959
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.177232662426543e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9699491004149119
          entropy_coeff: 0.009999999999999998
          kl: 0.004126810902046824
          policy_loss: 0.0936762112710211
          total_loss: 0.09167253606849246
          vf_explained_var: 0.22939088940620422
          vf_loss: 0.007695812798273336
    num_agent_steps_sampled: 855000
    num_agent_steps_trained: 855000
    num_steps_sampled: 855000
    num_steps_trained: 855000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,855,22703.2,855000,-3.0554,-2.61,-3.53,305.54


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 856000
  custom_metrics: {}
  date: 2021-10-24_14-26-57
  done: false
  episode_len_mean: 306.17
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.0616999999999788
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 3
  episodes_total: 2962
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5886163312132717e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8871711572011312
          entropy_coeff: 0.009999999999999998
          kl: 0.008444065228292965
          policy_loss: 0.05263581110371484
          total_loss: 0.05556047757466634
          vf_explained_var: 0.10237066447734833
          vf_loss: 0.011796380879564418
    num_agent_steps_sampled: 856000
    num_agent_steps_trained: 856000
    num_steps_sampled: 856000
    num_steps_trained: 8560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,856,22735.5,856000,-3.0617,-2.61,-3.53,306.17


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 857000
  custom_metrics: {}
  date: 2021-10-24_14-27-29
  done: false
  episode_len_mean: 306.08
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.0607999999999786
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 3
  episodes_total: 2965
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5886163312132717e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8562553736898634
          entropy_coeff: 0.009999999999999998
          kl: 0.007229153295681377
          policy_loss: -0.09635603725910187
          total_loss: -0.08831552995575799
          vf_explained_var: 0.10808359086513519
          vf_loss: 0.016603057976398203
    num_agent_steps_sampled: 857000
    num_agent_steps_trained: 857000
    num_steps_sampled: 857000
    num_steps_trained: 85

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,857,22766.9,857000,-3.0608,-2.61,-3.53,306.08


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 858000
  custom_metrics: {}
  date: 2021-10-24_14-28-00
  done: false
  episode_len_mean: 305.18
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.051799999999979
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 4
  episodes_total: 2969
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5886163312132717e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8017030696074168
          entropy_coeff: 0.009999999999999998
          kl: 0.019338059520226065
          policy_loss: -0.0076943774190213945
          total_loss: -0.0005509047044648064
          vf_explained_var: 0.19855526089668274
          vf_loss: 0.015160501479274696
    num_agent_steps_sampled: 858000
    num_agent_steps_trained: 858000
    num_steps_sampled: 858000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,858,22798.2,858000,-3.0518,-2.61,-3.53,305.18




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 859000
  custom_metrics: {}
  date: 2021-10-24_14-28-48
  done: false
  episode_len_mean: 304.92
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.049199999999979
  episode_reward_min: -3.5299999999999687
  episodes_this_iter: 3
  episodes_total: 2972
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5886163312132717e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9461044775115119
          entropy_coeff: 0.009999999999999998
          kl: 0.022606605580128857
          policy_loss: 0.05365470167663362
          total_loss: 0.0549973103735182
          vf_explained_var: 0.4205946624279022
          vf_loss: 0.010803648998909112
    num_agent_steps_sampled: 859000
    num_agent_steps_trained: 859000
    num_steps_sampled: 859000
    num_steps_trained: 859000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,859,22846.3,859000,-3.0492,-2.61,-3.53,304.92


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 860000
  custom_metrics: {}
  date: 2021-10-24_14-29-17
  done: false
  episode_len_mean: 306.35
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.0634999999999786
  episode_reward_min: -4.1699999999999555
  episodes_this_iter: 3
  episodes_total: 2975
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8829244968199073e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0360566371017033
          entropy_coeff: 0.009999999999999998
          kl: 0.023801833123099116
          policy_loss: 0.09900799385375447
          total_loss: 0.09902795826395352
          vf_explained_var: 0.5287227630615234
          vf_loss: 0.010380523776014646
    num_agent_steps_sampled: 860000
    num_agent_steps_trained: 860000
    num_steps_sampled: 860000
    num_steps_trained: 86000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,860,22874.8,860000,-3.0635,-2.61,-4.17,306.35


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 861000
  custom_metrics: {}
  date: 2021-10-24_14-29-43
  done: false
  episode_len_mean: 308.51
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.0850999999999784
  episode_reward_min: -4.6399999999999455
  episodes_this_iter: 2
  episodes_total: 2977
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.824386745229861e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3976129035154978
          entropy_coeff: 0.009999999999999998
          kl: 0.023207801429845965
          policy_loss: -0.028022880189948612
          total_loss: -0.03185253027412627
          vf_explained_var: 0.3988505005836487
          vf_loss: 0.010146467722693665
    num_agent_steps_sampled: 861000
    num_agent_steps_trained: 861000
    num_steps_sampled: 861000
    num_steps_trained: 861

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,861,22900.8,861000,-3.0851,-2.61,-4.64,308.51


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 862000
  custom_metrics: {}
  date: 2021-10-24_14-30-09
  done: false
  episode_len_mean: 311.64
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.116399999999977
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 2
  episodes_total: 2979
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.736580117844795e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.6200096395280625
          entropy_coeff: 0.009999999999999998
          kl: 0.014552724616912174
          policy_loss: -0.05820018384191725
          total_loss: -0.058822150197294025
          vf_explained_var: -0.2850101590156555
          vf_loss: 0.015578125562751666
    num_agent_steps_sampled: 862000
    num_agent_steps_trained: 862000
    num_steps_sampled: 862000
    num_steps_trained: 8620

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,862,22927.4,862000,-3.1164,-2.61,-4.75,311.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 863000
  custom_metrics: {}
  date: 2021-10-24_14-30-37
  done: false
  episode_len_mean: 314.57
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.145199999999976
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 2982
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.736580117844795e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.5306568834516736
          entropy_coeff: 0.009999999999999998
          kl: 0.02349711643280218
          policy_loss: 0.03368128774066766
          total_loss: 0.035247601278954085
          vf_explained_var: 0.031316984444856644
          vf_loss: 0.016872862672122815
    num_agent_steps_sampled: 863000
    num_agent_steps_trained: 863000
    num_steps_sampled: 863000
    num_steps_trained: 863000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,863,22954.7,863000,-3.1452,-2.61,-4.75,314.57


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 864000
  custom_metrics: {}
  date: 2021-10-24_14-31-03
  done: false
  episode_len_mean: 317.85
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.155999999999976
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 2
  episodes_total: 2984
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3104870176767186e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.524468794133928
          entropy_coeff: 0.009999999999999998
          kl: 0.029282041405867914
          policy_loss: -0.07322262575229009
          total_loss: 0.11300045458806886
          vf_explained_var: 0.17259731888771057
          vf_loss: 0.20146773163643147
    num_agent_steps_sampled: 864000
    num_agent_steps_trained: 864000
    num_steps_sampled: 864000
    num_steps_trained: 864000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,864,22980.7,864000,-3.156,-2.32,-4.75,317.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 865000
  custom_metrics: {}
  date: 2021-10-24_14-31-33
  done: false
  episode_len_mean: 320.03
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.1777999999999764
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 2987
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9657305265150787e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1178966290420955
          entropy_coeff: 0.009999999999999998
          kl: 0.01901800104753559
          policy_loss: 0.008658425716890229
          total_loss: 0.015143335941765044
          vf_explained_var: 0.5019263029098511
          vf_loss: 0.017663839945776597
    num_agent_steps_sampled: 865000
    num_agent_steps_trained: 865000
    num_steps_sampled: 865000
    num_steps_trained: 86500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,865,23011.3,865000,-3.1778,-2.32,-4.75,320.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 866000
  custom_metrics: {}
  date: 2021-10-24_14-32-05
  done: false
  episode_len_mean: 320.52
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.182699999999976
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 2990
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9657305265150787e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9690940810574425
          entropy_coeff: 0.009999999999999998
          kl: 0.011258081924870014
          policy_loss: 0.05582780759367678
          total_loss: 0.055695661819643444
          vf_explained_var: 0.606826901435852
          vf_loss: 0.009558777332616349
    num_agent_steps_sampled: 866000
    num_agent_steps_trained: 866000
    num_steps_sampled: 866000
    num_steps_trained: 866000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,866,23042.8,866000,-3.1827,-2.32,-4.75,320.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 867000
  custom_metrics: {}
  date: 2021-10-24_14-32-36
  done: false
  episode_len_mean: 321.01
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.187599999999975
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 2993
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9657305265150787e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1529809402094946
          entropy_coeff: 0.009999999999999998
          kl: 0.01026103127367427
          policy_loss: 0.05967237088415358
          total_loss: 0.056988892952601115
          vf_explained_var: 0.36193233728408813
          vf_loss: 0.008846312211567743
    num_agent_steps_sampled: 867000
    num_agent_steps_trained: 867000
    num_steps_sampled: 867000
    num_steps_trained: 867000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,867,23074.3,867000,-3.1876,-2.32,-4.75,321.01


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 868000
  custom_metrics: {}
  date: 2021-10-24_14-33-07
  done: false
  episode_len_mean: 321.72
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.1946999999999752
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 2996
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9657305265150787e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0353620972898272
          entropy_coeff: 0.009999999999999998
          kl: 0.013620674676598298
          policy_loss: 0.04242155986527602
          total_loss: 0.04084860256148709
          vf_explained_var: -0.01787044107913971
          vf_loss: 0.00878064059264337
    num_agent_steps_sampled: 868000
    num_agent_steps_trained: 868000
    num_steps_sampled: 868000
    num_steps_trained: 86800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,868,23104.9,868000,-3.1947,-2.32,-4.75,321.72


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 869000
  custom_metrics: {}
  date: 2021-10-24_14-33-39
  done: false
  episode_len_mean: 322.35
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2009999999999748
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 2999
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9657305265150787e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.146434775988261
          entropy_coeff: 0.009999999999999998
          kl: 0.013622028809224791
          policy_loss: 0.07041409744156732
          total_loss: 0.06696950511799918
          vf_explained_var: 0.3899853527545929
          vf_loss: 0.008019733043491012
    num_agent_steps_sampled: 869000
    num_agent_steps_trained: 869000
    num_steps_sampled: 869000
    num_steps_trained: 869000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,869,23136.6,869000,-3.201,-2.32,-4.75,322.35




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 870000
  custom_metrics: {}
  date: 2021-10-24_14-34-28
  done: false
  episode_len_mean: 322.86
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2060999999999753
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3002
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9657305265150787e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0708776142862109
          entropy_coeff: 0.009999999999999998
          kl: 0.013642881041234053
          policy_loss: 0.028248040792014865
          total_loss: 0.02614377070632246
          vf_explained_var: 0.46467721462249756
          vf_loss: 0.008604483341332524
    num_agent_steps_sampled: 870000
    num_agent_steps_trained: 870000
    num_steps_sampled: 870000
    num_steps_trained: 8700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,870,23186.2,870000,-3.2061,-2.32,-4.75,322.86


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 871000
  custom_metrics: {}
  date: 2021-10-24_14-34-58
  done: false
  episode_len_mean: 323.62
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2136999999999745
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3005
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9657305265150787e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8873287353250715
          entropy_coeff: 0.009999999999999998
          kl: 0.005182872058682619
          policy_loss: -0.07933379825618532
          total_loss: -0.07592994645237923
          vf_explained_var: 0.3748216927051544
          vf_loss: 0.012277131558706363
    num_agent_steps_sampled: 871000
    num_agent_steps_trained: 871000
    num_steps_sampled: 871000
    num_steps_trained: 8710

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,871,23215.9,871000,-3.2137,-2.32,-4.75,323.62


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 872000
  custom_metrics: {}
  date: 2021-10-24_14-35-29
  done: false
  episode_len_mean: 324.68
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.224299999999975
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3008
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9657305265150787e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.055175683233473
          entropy_coeff: 0.009999999999999998
          kl: 0.021075242529164404
          policy_loss: -0.1075401777608527
          total_loss: -0.10626623556017875
          vf_explained_var: 0.4830555021762848
          vf_loss: 0.011825664527714252
    num_agent_steps_sampled: 872000
    num_agent_steps_trained: 872000
    num_steps_sampled: 872000
    num_steps_trained: 872000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,872,23246.9,872000,-3.2243,-2.32,-4.75,324.68


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 873000
  custom_metrics: {}
  date: 2021-10-24_14-36-00
  done: false
  episode_len_mean: 325.68
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2342999999999744
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3011
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.948595789772618e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0848696066273584
          entropy_coeff: 0.009999999999999998
          kl: 0.01045293851750052
          policy_loss: -0.13823228602608045
          total_loss: -0.1392910318242179
          vf_explained_var: 0.6469506025314331
          vf_loss: 0.009789920867317253
    num_agent_steps_sampled: 873000
    num_agent_steps_trained: 873000
    num_steps_sampled: 873000
    num_steps_trained: 873000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,873,23278.3,873000,-3.2343,-2.32,-4.75,325.68


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 874000
  custom_metrics: {}
  date: 2021-10-24_14-36-32
  done: false
  episode_len_mean: 326.7
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.244499999999974
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 4
  episodes_total: 3015
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.948595789772618e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9354338182343377
          entropy_coeff: 0.009999999999999998
          kl: 0.006416346769192
          policy_loss: 0.0015701259175936381
          total_loss: -0.00043461041318045723
          vf_explained_var: 0.7452201843261719
          vf_loss: 0.007349584011050562
    num_agent_steps_sampled: 874000
    num_agent_steps_trained: 874000
    num_steps_sampled: 874000
    num_steps_trained: 874000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,874,23309.3,874000,-3.2445,-2.32,-4.75,326.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 875000
  custom_metrics: {}
  date: 2021-10-24_14-37-02
  done: false
  episode_len_mean: 327.49
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.252399999999975
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3018
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.948595789772618e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9417108992735544
          entropy_coeff: 0.009999999999999998
          kl: 0.006920384694684984
          policy_loss: 0.02139736173881425
          total_loss: 0.01880589135819011
          vf_explained_var: 0.6618660092353821
          vf_loss: 0.006825622191859616
    num_agent_steps_sampled: 875000
    num_agent_steps_trained: 875000
    num_steps_sampled: 875000
    num_steps_trained: 875000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,875,23339.8,875000,-3.2524,-2.32,-4.75,327.49


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 876000
  custom_metrics: {}
  date: 2021-10-24_14-37-32
  done: false
  episode_len_mean: 328.33
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2607999999999744
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3021
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.948595789772618e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8284119745095571
          entropy_coeff: 0.009999999999999998
          kl: 0.01008492037091639
          policy_loss: 0.04387798665298356
          total_loss: 0.043505418300628665
          vf_explained_var: 0.6651968359947205
          vf_loss: 0.007911526872259047
    num_agent_steps_sampled: 876000
    num_agent_steps_trained: 876000
    num_steps_sampled: 876000
    num_steps_trained: 876000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,876,23369.6,876000,-3.2608,-2.32,-4.75,328.33


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 877000
  custom_metrics: {}
  date: 2021-10-24_14-38-02
  done: false
  episode_len_mean: 329.43
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.271799999999973
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3024
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.948595789772618e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7882462726698981
          entropy_coeff: 0.009999999999999998
          kl: 0.005835164802327518
          policy_loss: 0.05405792188313272
          total_loss: 0.05545240417122841
          vf_explained_var: 0.5529985427856445
          vf_loss: 0.009276931453496218
    num_agent_steps_sampled: 877000
    num_agent_steps_trained: 877000
    num_steps_sampled: 877000
    num_steps_trained: 877000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,877,23400.1,877000,-3.2718,-2.32,-4.75,329.43


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 878000
  custom_metrics: {}
  date: 2021-10-24_14-38-35
  done: false
  episode_len_mean: 329.87
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2761999999999727
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3027
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.948595789772618e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6039283957746294
          entropy_coeff: 0.009999999999999998
          kl: 0.005138617926264525
          policy_loss: 0.00437603493531545
          total_loss: 0.007845055477486717
          vf_explained_var: 0.5329447388648987
          vf_loss: 0.009508291511641195
    num_agent_steps_sampled: 878000
    num_agent_steps_trained: 878000
    num_steps_sampled: 878000
    num_steps_trained: 878000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,878,23433,878000,-3.2762,-2.32,-4.75,329.87




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 879000
  custom_metrics: {}
  date: 2021-10-24_14-39-24
  done: false
  episode_len_mean: 330.09
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.278399999999973
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3030
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.948595789772618e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6750765217675103
          entropy_coeff: 0.009999999999999998
          kl: 0.0032046447512127448
          policy_loss: -0.11988410246041085
          total_loss: -0.11377069643802112
          vf_explained_var: 0.46603816747665405
          vf_loss: 0.012864161572522587
    num_agent_steps_sampled: 879000
    num_agent_steps_trained: 879000
    num_steps_sampled: 879000
    num_steps_trained: 8790

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,879,23482.1,879000,-3.2784,-2.32,-4.75,330.09


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 880000
  custom_metrics: {}
  date: 2021-10-24_14-39-58
  done: false
  episode_len_mean: 330.64
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.283899999999974
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 4
  episodes_total: 3034
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.474297894886309e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6175986064804925
          entropy_coeff: 0.009999999999999998
          kl: 0.004174677942155351
          policy_loss: 0.0026214684877130722
          total_loss: 0.00931916952961021
          vf_explained_var: 0.409909188747406
          vf_loss: 0.012873685328910749
    num_agent_steps_sampled: 880000
    num_agent_steps_trained: 880000
    num_steps_sampled: 880000
    num_steps_trained: 880000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,880,23515.4,880000,-3.2839,-2.32,-4.75,330.64


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 881000
  custom_metrics: {}
  date: 2021-10-24_14-40-30
  done: false
  episode_len_mean: 330.66
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2840999999999734
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3037
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.371489474431545e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5413886825243632
          entropy_coeff: 0.009999999999999998
          kl: 0.004141632285964134
          policy_loss: 0.07721472150749631
          total_loss: 0.08085449751880434
          vf_explained_var: 0.5026817321777344
          vf_loss: 0.009053658954669825
    num_agent_steps_sampled: 881000
    num_agent_steps_trained: 881000
    num_steps_sampled: 881000
    num_steps_trained: 881000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,881,23547.7,881000,-3.2841,-2.32,-4.75,330.66


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 882000
  custom_metrics: {}
  date: 2021-10-24_14-41-04
  done: false
  episode_len_mean: 330.57
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2831999999999737
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3040
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6857447372157725e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5218933237923516
          entropy_coeff: 0.009999999999999998
          kl: 0.002986691899040592
          policy_loss: -0.09753498054212995
          total_loss: -0.08955834325816896
          vf_explained_var: 0.28203240036964417
          vf_loss: 0.013195568602532149
    num_agent_steps_sampled: 882000
    num_agent_steps_trained: 882000
    num_steps_sampled: 882000
    num_steps_trained: 882

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,882,23581.1,882000,-3.2832,-2.32,-4.75,330.57


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 883000
  custom_metrics: {}
  date: 2021-10-24_14-41-35
  done: false
  episode_len_mean: 329.24
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.269899999999973
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 4
  episodes_total: 3044
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8428723686078863e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.55816860265202
          entropy_coeff: 0.009999999999999998
          kl: 0.005910972998613791
          policy_loss: 0.0095374776257409
          total_loss: 0.017115946859121323
          vf_explained_var: 0.3397994041442871
          vf_loss: 0.013160156251655685
    num_agent_steps_sampled: 883000
    num_agent_steps_trained: 883000
    num_steps_sampled: 883000
    num_steps_trained: 883000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,883,23612.7,883000,-3.2699,-2.32,-4.75,329.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 884000
  custom_metrics: {}
  date: 2021-10-24_14-42-08
  done: false
  episode_len_mean: 328.56
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.263099999999974
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3047
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8428723686078863e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5161735259824329
          entropy_coeff: 0.009999999999999998
          kl: 0.00446121526508168
          policy_loss: 0.05420710345109304
          total_loss: 0.057743267218271895
          vf_explained_var: 0.4755716025829315
          vf_loss: 0.008697898145894417
    num_agent_steps_sampled: 884000
    num_agent_steps_trained: 884000
    num_steps_sampled: 884000
    num_steps_trained: 884000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,884,23645.3,884000,-3.2631,-2.32,-4.75,328.56


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 885000
  custom_metrics: {}
  date: 2021-10-24_14-42-43
  done: false
  episode_len_mean: 327.19
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2493999999999734
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3050
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.214361843039431e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.5417231996854146
          entropy_coeff: 0.009999999999999998
          kl: 0.005460502389990059
          policy_loss: -0.09560434164272415
          total_loss: -0.08806709655457073
          vf_explained_var: 0.35335803031921387
          vf_loss: 0.012954478731585874
    num_agent_steps_sampled: 885000
    num_agent_steps_trained: 885000
    num_steps_sampled: 885000
    num_steps_trained: 8850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,885,23680.2,885000,-3.2494,-2.32,-4.75,327.19


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 886000
  custom_metrics: {}
  date: 2021-10-24_14-43-15
  done: false
  episode_len_mean: 325.7
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2344999999999744
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 4
  episodes_total: 3054
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.214361843039431e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.7142310976982117
          entropy_coeff: 0.009999999999999998
          kl: 0.009703062591672622
          policy_loss: 0.03513684769471486
          total_loss: 0.039169737613863415
          vf_explained_var: 0.4428485929965973
          vf_loss: 0.011175200301739905
    num_agent_steps_sampled: 886000
    num_agent_steps_trained: 886000
    num_steps_sampled: 886000
    num_steps_trained: 886000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,886,23712.4,886000,-3.2345,-2.32,-4.75,325.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 887000
  custom_metrics: {}
  date: 2021-10-24_14-43-46
  done: false
  episode_len_mean: 325.04
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2278999999999747
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3057
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.214361843039431e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.660866622461213
          entropy_coeff: 0.009999999999999998
          kl: 0.06762405489971263
          policy_loss: 0.11537427571084764
          total_loss: 0.11432066849536365
          vf_explained_var: 0.7117177248001099
          vf_loss: 0.005555052059288654
    num_agent_steps_sampled: 887000
    num_agent_steps_trained: 887000
    num_steps_sampled: 887000
    num_steps_trained: 887000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,887,23743.3,887000,-3.2279,-2.32,-4.75,325.04




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 888000
  custom_metrics: {}
  date: 2021-10-24_14-44-33
  done: false
  episode_len_mean: 325.02
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2276999999999743
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3060
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3821542764559145e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9804315712716845
          entropy_coeff: 0.009999999999999998
          kl: 0.019251083354031827
          policy_loss: -0.09943819642066956
          total_loss: -0.10076255152622858
          vf_explained_var: 0.6818151473999023
          vf_loss: 0.00847995796551307
    num_agent_steps_sampled: 888000
    num_agent_steps_trained: 888000
    num_steps_sampled: 888000
    num_steps_trained: 88800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,888,23790.5,888000,-3.2277,-2.32,-4.75,325.02


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 889000
  custom_metrics: {}
  date: 2021-10-24_14-45-02
  done: false
  episode_len_mean: 325.94
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2368999999999746
  episode_reward_min: -4.749999999999943
  episodes_this_iter: 3
  episodes_total: 3063
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3821542764559145e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0169642673598396
          entropy_coeff: 0.009999999999999998
          kl: 0.025242778553490552
          policy_loss: -0.07823956981301308
          total_loss: -0.0789436012506485
          vf_explained_var: 0.5659016966819763
          vf_loss: 0.009465611037901707
    num_agent_steps_sampled: 889000
    num_agent_steps_trained: 889000
    num_steps_sampled: 889000
    num_steps_trained: 88900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,889,23819.9,889000,-3.2369,-2.32,-4.75,325.94


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 890000
  custom_metrics: {}
  date: 2021-10-24_14-45-28
  done: false
  episode_len_mean: 327.7
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.284199999999974
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3066
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0732314146838724e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2117137167188856
          entropy_coeff: 0.009999999999999998
          kl: 0.021923642023888353
          policy_loss: 0.05968795066906346
          total_loss: 0.09074555283619298
          vf_explained_var: -0.203145831823349
          vf_loss: 0.043174737792772554
    num_agent_steps_sampled: 890000
    num_agent_steps_trained: 890000
    num_steps_sampled: 890000
    num_steps_trained: 890000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,890,23845.4,890000,-3.2842,-2.32,-6.96,327.7


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 891000
  custom_metrics: {}
  date: 2021-10-24_14-45-55
  done: false
  episode_len_mean: 329.2
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2991999999999746
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3069
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.109847122025808e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1367357068591648
          entropy_coeff: 0.009999999999999998
          kl: 0.11179304758125694
          policy_loss: 0.010766646928257413
          total_loss: 0.009913569026523166
          vf_explained_var: 0.4826815724372864
          vf_loss: 0.01051424883901038
    num_agent_steps_sampled: 891000
    num_agent_steps_trained: 891000
    num_steps_sampled: 891000
    num_steps_trained: 891000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,891,23872.5,891000,-3.2992,-2.32,-6.96,329.2


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 892000
  custom_metrics: {}
  date: 2021-10-24_14-46-23
  done: false
  episode_len_mean: 330.06
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.307799999999973
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3072
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6647706830387125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0290263540214963
          entropy_coeff: 0.009999999999999998
          kl: 0.01858621753685507
          policy_loss: 0.09438242779837715
          total_loss: 0.09427644792530272
          vf_explained_var: 0.21951985359191895
          vf_loss: 0.01018428162464665
    num_agent_steps_sampled: 892000
    num_agent_steps_trained: 892000
    num_steps_sampled: 892000
    num_steps_trained: 892000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,892,23900.3,892000,-3.3078,-2.32,-6.96,330.06


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 893000
  custom_metrics: {}
  date: 2021-10-24_14-46-53
  done: false
  episode_len_mean: 329.03
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2974999999999732
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3075
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6647706830387125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6630650066667133
          entropy_coeff: 0.009999999999999998
          kl: 0.008623232514296102
          policy_loss: 0.07263577878475189
          total_loss: 0.07587586442629496
          vf_explained_var: 0.47677260637283325
          vf_loss: 0.009870733061365575
    num_agent_steps_sampled: 893000
    num_agent_steps_trained: 893000
    num_steps_sampled: 893000
    num_steps_trained: 89300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,893,23930,893000,-3.2975,-2.32,-6.96,329.03


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 894000
  custom_metrics: {}
  date: 2021-10-24_14-47-23
  done: false
  episode_len_mean: 325.89
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.266099999999974
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3078
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6647706830387125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7843548029661178
          entropy_coeff: 0.009999999999999998
          kl: 0.01936237569564838
          policy_loss: 0.10926644305388132
          total_loss: 0.10721593780650032
          vf_explained_var: 0.3998672068119049
          vf_loss: 0.005793038117311274
    num_agent_steps_sampled: 894000
    num_agent_steps_trained: 894000
    num_steps_sampled: 894000
    num_steps_trained: 894000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,894,23960.7,894000,-3.2661,-2.32,-6.96,325.89


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 895000
  custom_metrics: {}
  date: 2021-10-24_14-47-55
  done: false
  episode_len_mean: 323.23
  episode_media: {}
  episode_reward_max: -2.3199999999999674
  episode_reward_mean: -3.2399999999999745
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3081
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6647706830387125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6088803297943539
          entropy_coeff: 0.009999999999999998
          kl: 0.010313113507604754
          policy_loss: -0.05399209823873308
          total_loss: -0.04699381697509024
          vf_explained_var: 0.16597653925418854
          vf_loss: 0.013087080031012496
    num_agent_steps_sampled: 895000
    num_agent_steps_trained: 895000
    num_steps_sampled: 895000
    num_steps_trained: 895

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,895,23992,895000,-3.24,-2.32,-6.96,323.23


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 896000
  custom_metrics: {}
  date: 2021-10-24_14-48-21
  done: false
  episode_len_mean: 319.9
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.228699999999975
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3084
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6647706830387125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1037249777052138
          entropy_coeff: 0.009999999999999998
          kl: 0.01520721380911958
          policy_loss: -0.10071531708041827
          total_loss: -0.09711545391215218
          vf_explained_var: 0.2990367114543915
          vf_loss: 0.014637103951018717
    num_agent_steps_sampled: 896000
    num_agent_steps_trained: 896000
    num_steps_sampled: 896000
    num_steps_trained: 896000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,896,24018.1,896000,-3.2287,-2.72,-6.96,319.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 897000
  custom_metrics: {}
  date: 2021-10-24_14-48-48
  done: false
  episode_len_mean: 319.63
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.2259999999999747
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3087
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6647706830387125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1216315527757008
          entropy_coeff: 0.009999999999999998
          kl: 0.01074902719328258
          policy_loss: 0.056165639145506756
          total_loss: 0.05592077399293582
          vf_explained_var: 0.09068027883768082
          vf_loss: 0.010971450567012653
    num_agent_steps_sampled: 897000
    num_agent_steps_trained: 897000
    num_steps_sampled: 897000
    num_steps_trained: 897000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,897,24044.9,897000,-3.226,-2.72,-6.96,319.63




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 898000
  custom_metrics: {}
  date: 2021-10-24_14-49-31
  done: false
  episode_len_mean: 320.16
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.2312999999999756
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3090
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6647706830387125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.3297556228107876
          entropy_coeff: 0.009999999999999998
          kl: 0.013909675926491439
          policy_loss: 0.005862179398536682
          total_loss: 0.0027122407737705445
          vf_explained_var: 0.1205274909734726
          vf_loss: 0.010147611633712788
    num_agent_steps_sampled: 898000
    num_agent_steps_trained: 898000
    num_steps_sampled: 898000
    num_steps_trained: 8980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,898,24088.2,898000,-3.2313,-2.72,-6.96,320.16


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 899000
  custom_metrics: {}
  date: 2021-10-24_14-50-00
  done: false
  episode_len_mean: 320.9
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.238699999999976
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 2
  episodes_total: 3092
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6647706830387125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2164143708017137
          entropy_coeff: 0.009999999999999998
          kl: 0.012357065131848958
          policy_loss: -0.08996768825583988
          total_loss: -0.08864288752277692
          vf_explained_var: 0.07108862698078156
          vf_loss: 0.013488942560636335
    num_agent_steps_sampled: 899000
    num_agent_steps_trained: 899000
    num_steps_sampled: 899000
    num_steps_trained: 899000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,899,24117.2,899000,-3.2387,-2.72,-6.96,320.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 900000
  custom_metrics: {}
  date: 2021-10-24_14-50-29
  done: false
  episode_len_mean: 321.61
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.2457999999999743
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3095
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6647706830387125e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9200557854440478
          entropy_coeff: 0.009999999999999998
          kl: 0.02572379205942323
          policy_loss: -0.10970156374904845
          total_loss: -0.10289681947065724
          vf_explained_var: 0.22168293595314026
          vf_loss: 0.01600529464582602
    num_agent_steps_sampled: 900000
    num_agent_steps_trained: 900000
    num_steps_sampled: 900000
    num_steps_trained: 900000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,900,24145.9,900000,-3.2458,-2.72,-6.96,321.61


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 901000
  custom_metrics: {}
  date: 2021-10-24_14-51-00
  done: false
  episode_len_mean: 320.91
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.238799999999975
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 4
  episodes_total: 3099
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.997156024558069e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.295022147231632
          entropy_coeff: 0.009999999999999998
          kl: 0.013253775914268375
          policy_loss: -0.00043337717652320863
          total_loss: -0.0005058219863308801
          vf_explained_var: 0.4289683401584625
          vf_loss: 0.012877772479421562
    num_agent_steps_sampled: 901000
    num_agent_steps_trained: 901000
    num_steps_sampled: 901000
    num_steps_trained: 901

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,901,24176.8,901000,-3.2388,-2.72,-6.96,320.91


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 902000
  custom_metrics: {}
  date: 2021-10-24_14-51-31
  done: false
  episode_len_mean: 320.88
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.238499999999974
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3102
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.997156024558069e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.4386180996894837
          entropy_coeff: 0.009999999999999998
          kl: 0.010118474997729205
          policy_loss: 0.027171524365743
          total_loss: 0.022143538627359603
          vf_explained_var: 0.46890321373939514
          vf_loss: 0.009358188895405167
    num_agent_steps_sampled: 902000
    num_agent_steps_trained: 902000
    num_steps_sampled: 902000
    num_steps_trained: 902000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,902,24208.1,902000,-3.2385,-2.72,-6.96,320.88


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 903000
  custom_metrics: {}
  date: 2021-10-24_14-52-02
  done: false
  episode_len_mean: 320.93
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.2389999999999746
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3105
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.997156024558069e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.5388104557991027
          entropy_coeff: 0.009999999999999998
          kl: 0.022523338881194614
          policy_loss: 0.044537577198611365
          total_loss: 0.0388840070201291
          vf_explained_var: 0.28981006145477295
          vf_loss: 0.009734523192875915
    num_agent_steps_sampled: 903000
    num_agent_steps_trained: 903000
    num_steps_sampled: 903000
    num_steps_trained: 903000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,903,24238.6,903000,-3.239,-2.72,-6.96,320.93


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 904000
  custom_metrics: {}
  date: 2021-10-24_14-52-32
  done: false
  episode_len_mean: 320.14
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.2310999999999757
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3108
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.04957340368371e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1963421649403043
          entropy_coeff: 0.009999999999999998
          kl: 0.01748130937384044
          policy_loss: -0.11844316514001953
          total_loss: -0.11940004138482942
          vf_explained_var: 0.5431554913520813
          vf_loss: 0.011006529091133012
    num_agent_steps_sampled: 904000
    num_agent_steps_trained: 904000
    num_steps_sampled: 904000
    num_steps_trained: 904000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,904,24269.5,904000,-3.2311,-2.72,-6.96,320.14


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 905000
  custom_metrics: {}
  date: 2021-10-24_14-53-04
  done: false
  episode_len_mean: 319.57
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.225399999999975
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 4
  episodes_total: 3112
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.04957340368371e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.0063809825314416
          entropy_coeff: 0.009999999999999998
          kl: 0.01697914477949267
          policy_loss: -0.05517953129278289
          total_loss: -0.054919251634014975
          vf_explained_var: 0.5350525975227356
          vf_loss: 0.010324077168479561
    num_agent_steps_sampled: 905000
    num_agent_steps_trained: 905000
    num_steps_sampled: 905000
    num_steps_trained: 905000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,905,24301.1,905000,-3.2254,-2.72,-6.96,319.57


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 906000
  custom_metrics: {}
  date: 2021-10-24_14-53-31
  done: false
  episode_len_mean: 320.88
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.2384999999999757
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 2
  episodes_total: 3114
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.04957340368371e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9176500876744588
          entropy_coeff: 0.009999999999999998
          kl: 0.011562201653525348
          policy_loss: -0.0808603286743164
          total_loss: -0.07979433875944879
          vf_explained_var: 0.39512526988983154
          vf_loss: 0.010242482973262667
    num_agent_steps_sampled: 906000
    num_agent_steps_trained: 906000
    num_steps_sampled: 906000
    num_steps_trained: 906000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,906,24328.2,906000,-3.2385,-2.72,-6.96,320.88


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 907000
  custom_metrics: {}
  date: 2021-10-24_14-53-59
  done: false
  episode_len_mean: 321.49
  episode_media: {}
  episode_reward_max: -2.719999999999986
  episode_reward_mean: -3.2445999999999753
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3117
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.04957340368371e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9373373495207893
          entropy_coeff: 0.009999999999999998
          kl: 0.008484317940480126
          policy_loss: -0.10507725261979634
          total_loss: -0.10416703224182129
          vf_explained_var: 0.5576027035713196
          vf_loss: 0.010283584049385454
    num_agent_steps_sampled: 907000
    num_agent_steps_trained: 907000
    num_steps_sampled: 907000
    num_steps_trained: 907000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,907,24355.9,907000,-3.2446,-2.72,-6.96,321.49




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 908000
  custom_metrics: {}
  date: 2021-10-24_14-54-49
  done: false
  episode_len_mean: 320.52
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.2348999999999752
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 4
  episodes_total: 3121
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.04957340368371e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7102284875180986
          entropy_coeff: 0.009999999999999998
          kl: 0.005097581236892602
          policy_loss: -0.046223192910353345
          total_loss: -0.04532612851924366
          vf_explained_var: 0.6851803660392761
          vf_loss: 0.007999345339420769
    num_agent_steps_sampled: 908000
    num_agent_steps_trained: 908000
    num_steps_sampled: 908000
    num_steps_trained: 90800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,908,24406,908000,-3.2349,-2.61,-6.96,320.52


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 909000
  custom_metrics: {}
  date: 2021-10-24_14-55-17
  done: false
  episode_len_mean: 321.43
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.2439999999999753
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 2
  episodes_total: 3123
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.04957340368371e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.9945093704594506
          entropy_coeff: 0.009999999999999998
          kl: 0.04953008357720845
          policy_loss: -0.017760294261905882
          total_loss: -0.018378120329644945
          vf_explained_var: 0.6760273575782776
          vf_loss: 0.009327214255204632
    num_agent_steps_sampled: 909000
    num_agent_steps_trained: 909000
    num_steps_sampled: 909000
    num_steps_trained: 90900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,909,24433.9,909000,-3.244,-2.61,-6.96,321.43


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 910000
  custom_metrics: {}
  date: 2021-10-24_14-55-48
  done: false
  episode_len_mean: 321.48
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.244499999999975
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 4
  episodes_total: 3127
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.574360105525565e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.7591665450069639
          entropy_coeff: 0.009999999999999998
          kl: 0.029594796174393326
          policy_loss: -0.03205205574631691
          total_loss: -0.031302675728996594
          vf_explained_var: 0.5876702070236206
          vf_loss: 0.008341002200419704
    num_agent_steps_sampled: 910000
    num_agent_steps_trained: 910000
    num_steps_sampled: 910000
    num_steps_trained: 91000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,910,24465.4,910000,-3.2445,-2.61,-6.96,321.48


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 911000
  custom_metrics: {}
  date: 2021-10-24_14-56-22
  done: false
  episode_len_mean: 321.39
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.2435999999999745
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3130
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.361540158288347e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5201512760586209
          entropy_coeff: 0.009999999999999998
          kl: 0.027961108779652792
          policy_loss: 0.06128378783663114
          total_loss: 0.062231760720411936
          vf_explained_var: 0.588551938533783
          vf_loss: 0.006149421719601378
    num_agent_steps_sampled: 911000
    num_agent_steps_trained: 911000
    num_steps_sampled: 911000
    num_steps_trained: 911000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,911,24498.8,911000,-3.2436,-2.61,-6.96,321.39


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 912000
  custom_metrics: {}
  date: 2021-10-24_14-56-52
  done: false
  episode_len_mean: 321.19
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.2415999999999747
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3133
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5423102374325197e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6686349117093616
          entropy_coeff: 0.009999999999999998
          kl: 0.009726903432150793
          policy_loss: -0.09029314915339152
          total_loss: -0.08762142302261458
          vf_explained_var: 0.3914180397987366
          vf_loss: 0.009358041345452268
    num_agent_steps_sampled: 912000
    num_agent_steps_trained: 912000
    num_steps_sampled: 912000
    num_steps_trained: 9120

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,912,24528.9,912000,-3.2416,-2.61,-6.96,321.19


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 913000
  custom_metrics: {}
  date: 2021-10-24_14-57-26
  done: false
  episode_len_mean: 321.04
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.2400999999999756
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 4
  episodes_total: 3137
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5423102374325197e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5375603238741556
          entropy_coeff: 0.009999999999999998
          kl: 0.005116942659102443
          policy_loss: 0.008928475942876603
          total_loss: 0.01355018996530109
          vf_explained_var: 0.3805585503578186
          vf_loss: 0.009997301905726393
    num_agent_steps_sampled: 913000
    num_agent_steps_trained: 913000
    num_steps_sampled: 913000
    num_steps_trained: 91300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,913,24562.4,913000,-3.2401,-2.61,-6.96,321.04


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 914000
  custom_metrics: {}
  date: 2021-10-24_14-57-58
  done: false
  episode_len_mean: 321.21
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.2417999999999756
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3140
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5423102374325197e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6279765129089355
          entropy_coeff: 0.009999999999999998
          kl: 0.00858683536039785
          policy_loss: 0.053105664915508694
          total_loss: 0.05403948956065708
          vf_explained_var: 0.40896865725517273
          vf_loss: 0.007213559575999776
    num_agent_steps_sampled: 914000
    num_agent_steps_trained: 914000
    num_steps_sampled: 914000
    num_steps_trained: 91400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,914,24594.8,914000,-3.2418,-2.61,-6.96,321.21


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 915000
  custom_metrics: {}
  date: 2021-10-24_14-58-30
  done: false
  episode_len_mean: 321.24
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.2420999999999753
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3143
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5423102374325197e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.5652457803487778
          entropy_coeff: 0.009999999999999998
          kl: 0.019675183003994807
          policy_loss: -0.06221788566973474
          total_loss: -0.05690447207954195
          vf_explained_var: 0.31694161891937256
          vf_loss: 0.010965801227009958
    num_agent_steps_sampled: 915000
    num_agent_steps_trained: 915000
    num_steps_sampled: 915000
    num_steps_trained: 915

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,915,24627,915000,-3.2421,-2.61,-6.96,321.24


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 916000
  custom_metrics: {}
  date: 2021-10-24_14-59-03
  done: false
  episode_len_mean: 321.51
  episode_media: {}
  episode_reward_max: -2.6099999999999883
  episode_reward_mean: -3.244799999999975
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 4
  episodes_total: 3147
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5423102374325197e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.41287174622217815
          entropy_coeff: 0.009999999999999998
          kl: 0.003387428418052865
          policy_loss: -0.016592579376366403
          total_loss: -0.010188336008124882
          vf_explained_var: 0.39796850085258484
          vf_loss: 0.010532950320177608
    num_agent_steps_sampled: 916000
    num_agent_steps_trained: 916000
    num_steps_sampled: 916000
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,916,24659.8,916000,-3.2448,-2.61,-6.96,321.51




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 917000
  custom_metrics: {}
  date: 2021-10-24_14-59-52
  done: false
  episode_len_mean: 321.19
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.2415999999999747
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3150
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7711551187162599e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.4628169920709398
          entropy_coeff: 0.009999999999999998
          kl: 0.006641864378681264
          policy_loss: 0.04594561027155982
          total_loss: 0.04873049548930592
          vf_explained_var: 0.48966625332832336
          vf_loss: 0.007413046035475822
    num_agent_steps_sampled: 917000
    num_agent_steps_trained: 917000
    num_steps_sampled: 917000
    num_steps_trained: 91700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,917,24708.9,917000,-3.2416,-2.59,-6.96,321.19


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 918000
  custom_metrics: {}
  date: 2021-10-24_15-00-26
  done: false
  episode_len_mean: 321.19
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.2415999999999747
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3153
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7711551187162599e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.45649483501911164
          entropy_coeff: 0.009999999999999998
          kl: 0.007322492714669614
          policy_loss: -0.11303567621443007
          total_loss: -0.10643427032563421
          vf_explained_var: 0.2533276379108429
          vf_loss: 0.011166342492732738
    num_agent_steps_sampled: 918000
    num_agent_steps_trained: 918000
    num_steps_sampled: 918000
    num_steps_trained: 918

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,918,24742.8,918000,-3.2416,-2.59,-6.96,321.19


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 919000
  custom_metrics: {}
  date: 2021-10-24_15-00-58
  done: false
  episode_len_mean: 321.07
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.2403999999999744
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 4
  episodes_total: 3157
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7711551187162599e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.39558067984051176
          entropy_coeff: 0.009999999999999998
          kl: 0.0024259809766577845
          policy_loss: 0.035525697304142845
          total_loss: 0.042184004601505065
          vf_explained_var: 0.25197574496269226
          vf_loss: 0.01061410785963138
    num_agent_steps_sampled: 919000
    num_agent_steps_trained: 919000
    num_steps_sampled: 919000
    num_steps_trained: 91

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,919,24774.6,919000,-3.2404,-2.59,-6.96,321.07


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 920000
  custom_metrics: {}
  date: 2021-10-24_15-01-29
  done: false
  episode_len_mean: 320.45
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.234199999999975
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 3
  episodes_total: 3160
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.855775593581299e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.3979245798455344
          entropy_coeff: 0.009999999999999998
          kl: 0.011705018504130749
          policy_loss: 0.02555676665571001
          total_loss: 0.02878173560731941
          vf_explained_var: 0.2260773479938507
          vf_loss: 0.0072042046554593574
    num_agent_steps_sampled: 920000
    num_agent_steps_trained: 920000
    num_steps_sampled: 920000
    num_steps_trained: 920000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,920,24806.1,920000,-3.2342,-2.59,-6.96,320.45


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 921000
  custom_metrics: {}
  date: 2021-10-24_15-02-02
  done: false
  episode_len_mean: 318.94
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.2190999999999756
  episode_reward_min: -6.959999999999925
  episodes_this_iter: 4
  episodes_total: 3164
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.855775593581299e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.37607252995173135
          entropy_coeff: 0.009999999999999998
          kl: 0.006090749370499459
          policy_loss: 0.007931280384461085
          total_loss: 0.016337999577323594
          vf_explained_var: 0.258786678314209
          vf_loss: 0.01216744040656421
    num_agent_steps_sampled: 921000
    num_agent_steps_trained: 921000
    num_steps_sampled: 921000
    num_steps_trained: 921000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,921,24838.6,921000,-3.2191,-2.59,-6.96,318.94


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 922000
  custom_metrics: {}
  date: 2021-10-24_15-02-36
  done: false
  episode_len_mean: 316.97
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.1696999999999758
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 3
  episodes_total: 3167
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.855775593581299e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7087898562351863
          entropy_coeff: 0.009999999999999998
          kl: 0.04648224346801493
          policy_loss: 0.06048825176225768
          total_loss: 0.06073038122720188
          vf_explained_var: 0.32039880752563477
          vf_loss: 0.0073299904198696215
    num_agent_steps_sampled: 922000
    num_agent_steps_trained: 922000
    num_steps_sampled: 922000
    num_steps_trained: 922000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,922,24872.5,922000,-3.1697,-2.59,-4.54,316.97


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 923000
  custom_metrics: {}
  date: 2021-10-24_15-03-06
  done: false
  episode_len_mean: 315.61
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.1560999999999764
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 3
  episodes_total: 3170
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3283663390371952e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6546601802110672
          entropy_coeff: 0.009999999999999998
          kl: 0.07184579839288029
          policy_loss: 0.03078805117143525
          total_loss: 0.03214906288517846
          vf_explained_var: 0.35113176703453064
          vf_loss: 0.007907523100341981
    num_agent_steps_sampled: 923000
    num_agent_steps_trained: 923000
    num_steps_sampled: 923000
    num_steps_trained: 923000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,923,24902.7,923000,-3.1561,-2.59,-4.54,315.61


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 924000
  custom_metrics: {}
  date: 2021-10-24_15-03-38
  done: false
  episode_len_mean: 315.0
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.1499999999999773
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 3
  episodes_total: 3173
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9925495085557925e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.6671566724777221
          entropy_coeff: 0.009999999999999998
          kl: 0.006015766079576679
          policy_loss: -0.11257549333903524
          total_loss: -0.10928834651907285
          vf_explained_var: 0.4066242277622223
          vf_loss: 0.009958707003129853
    num_agent_steps_sampled: 924000
    num_agent_steps_trained: 924000
    num_steps_sampled: 924000
    num_steps_trained: 92400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,924,24934.2,924000,-3.15,-2.59,-4.54,315


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 925000
  custom_metrics: {}
  date: 2021-10-24_15-04-09
  done: false
  episode_len_mean: 314.09
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.140899999999977
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 4
  episodes_total: 3177
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9925495085557925e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.615198979443974
          entropy_coeff: 0.009999999999999998
          kl: 0.03873530757489539
          policy_loss: 0.02057269045876132
          total_loss: 0.02541348296735022
          vf_explained_var: 0.3196726143360138
          vf_loss: 0.010992709950854381
    num_agent_steps_sampled: 925000
    num_agent_steps_trained: 925000
    num_steps_sampled: 925000
    num_steps_trained: 925000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,925,24965.4,925000,-3.1409,-2.59,-4.54,314.09




Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 926000
  custom_metrics: {}
  date: 2021-10-24_15-04-59
  done: false
  episode_len_mean: 313.3
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.132999999999976
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 3
  episodes_total: 3180
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.98882426283369e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.37737616366810267
          entropy_coeff: 0.009999999999999998
          kl: 0.002099443002548658
          policy_loss: 0.009551287525229983
          total_loss: 0.014143656690915425
          vf_explained_var: 0.24205784499645233
          vf_loss: 0.00836612508735723
    num_agent_steps_sampled: 926000
    num_agent_steps_trained: 926000
    num_steps_sampled: 926000
    num_steps_trained: 926000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,926,25015.1,926000,-3.133,-2.59,-4.54,313.3


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 927000
  custom_metrics: {}
  date: 2021-10-24_15-05-30
  done: false
  episode_len_mean: 311.9
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.1189999999999776
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 4
  episodes_total: 3184
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.494412131416845e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.44730268319447836
          entropy_coeff: 0.009999999999999998
          kl: 0.005734949813342243
          policy_loss: 0.006868832972314623
          total_loss: 0.01458924917711152
          vf_explained_var: 0.2271452397108078
          vf_loss: 0.012193435099389818
    num_agent_steps_sampled: 927000
    num_agent_steps_trained: 927000
    num_steps_sampled: 927000
    num_steps_trained: 927000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,927,25046.8,927000,-3.119,-2.59,-4.54,311.9


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 928000
  custom_metrics: {}
  date: 2021-10-24_15-06-00
  done: false
  episode_len_mean: 309.84
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.098399999999977
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 3
  episodes_total: 3187
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.494412131416845e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.35393349942233826
          entropy_coeff: 0.009999999999999998
          kl: 0.0058297054458217685
          policy_loss: 0.06350647542211744
          total_loss: 0.0700995233323839
          vf_explained_var: 0.2143067866563797
          vf_loss: 0.01013237618867101
    num_agent_steps_sampled: 928000
    num_agent_steps_trained: 928000
    num_steps_sampled: 928000
    num_steps_trained: 928000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,928,25076.8,928000,-3.0984,-2.59,-4.54,309.84


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 929000
  custom_metrics: {}
  date: 2021-10-24_15-06-34
  done: false
  episode_len_mean: 307.31
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.0730999999999784
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 4
  episodes_total: 3191
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.494412131416845e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.300632132920954
          entropy_coeff: 0.009999999999999998
          kl: 0.002298599373173913
          policy_loss: -0.012761207007699542
          total_loss: -0.00344405182533794
          vf_explained_var: 0.22180555760860443
          vf_loss: 0.012323473321480883
    num_agent_steps_sampled: 929000
    num_agent_steps_trained: 929000
    num_steps_sampled: 929000
    num_steps_trained: 9290

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,929,25110.8,929000,-3.0731,-2.59,-4.54,307.31


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 930000
  custom_metrics: {}
  date: 2021-10-24_15-07-07
  done: false
  episode_len_mean: 304.44
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.0443999999999796
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 3
  episodes_total: 3194
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.472060657084225e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.2256522324350145
          entropy_coeff: 0.009999999999999998
          kl: 0.0051811386708585875
          policy_loss: 0.04941830899980333
          total_loss: 0.055829444858762955
          vf_explained_var: 0.36119750142097473
          vf_loss: 0.008667657730661126
    num_agent_steps_sampled: 930000
    num_agent_steps_trained: 930000
    num_steps_sampled: 930000
    num_steps_trained: 9300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,930,25143.9,930000,-3.0444,-2.59,-4.54,304.44


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 931000
  custom_metrics: {}
  date: 2021-10-24_15-07-40
  done: false
  episode_len_mean: 303.85
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.038499999999978
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 3
  episodes_total: 3197
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.472060657084225e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.5104250282049179
          entropy_coeff: 0.009999999999999998
          kl: 0.16178103157001383
          policy_loss: -0.12399091728859478
          total_loss: -0.11759431329038408
          vf_explained_var: 0.3374548554420471
          vf_loss: 0.011500735927580132
    num_agent_steps_sampled: 931000
    num_agent_steps_trained: 931000
    num_steps_sampled: 931000
    num_steps_trained: 931000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,931,25176.9,931000,-3.0385,-2.59,-4.54,303.85


Result for PPO_my_env_722d3_00000:
  agent_timesteps_total: 932000
  custom_metrics: {}
  date: 2021-10-24_15-08-13
  done: false
  episode_len_mean: 303.15
  episode_media: {}
  episode_reward_max: -2.5899999999999888
  episode_reward_mean: -3.031499999999978
  episode_reward_min: -4.539999999999948
  episodes_this_iter: 4
  episodes_total: 3201
  experiment_id: e107d5add964470ebc331ff15020baaf
  hostname: 7948e0475857
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1208090985626335e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.18397226201163397
          entropy_coeff: 0.009999999999999998
          kl: 0.009603571202591846
          policy_loss: 0.03925429392192099
          total_loss: 0.04857671765817536
          vf_explained_var: 0.2786206007003784
          vf_loss: 0.011162138155971965
    num_agent_steps_sampled: 932000
    num_agent_steps_trained: 932000
    num_steps_sampled: 932000
    num_steps_trained: 932000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_722d3_00000,RUNNING,172.17.0.3:43204,932,25209.5,932000,-3.0315,-2.59,-4.54,303.15
