In [1]:
import torch 
from torch import nn

import ray
from ray.rllib.agents import ppo
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.utils.annotations import override

#from models import VisualEncoder
from train import *
from wrappers_2 import *



In [2]:
class VisualEncoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.cnn = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=2, stride=2, padding=0),  
            nn.ELU(),
            nn.Conv2d(32, 32, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(32, 64, kernel_size=2, stride=2, padding=0), 
            nn.ELU(),
            nn.Conv2d(64, 128, kernel_size=2, stride=2, padding=0),
            nn.ELU(), 
            nn.Conv2d(128, 256, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Conv2d(256, 512, kernel_size=2, stride=2, padding=0),
            nn.ELU(),
            nn.Flatten(),
        )

    def forward(self, x):
        return self.cnn(x)

In [3]:
class MyModelClass(TorchModelV2, nn.Module):
    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
        nn.Module.__init__(self)
        features_dim = 512
        self.encoder = VisualEncoder()
        self.encoder.load_state_dict(
            torch.load("/IGLU-Minecraft/models/AngelaCNN/encoder_weigths.pth", map_location=torch.device('cpu'))
        )
        self.bn = nn.BatchNorm1d(features_dim, affine=False)
        self.mlp = nn.Sequential(
            nn.Linear(features_dim, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
            nn.Linear(256, 256),
            nn.ELU(),
            #nn.Linear(256, 256),
            #nn.ELU(),
            #nn.Linear(256, 256),
            #nn.ELU(),
        )
        self.action_head = nn.Linear(256, action_space.n)
        self.value_head = nn.Linear(256, 1)
        self.last_value = None
        
        self.use_cuda = torch.cuda.is_available()
        if self.use_cuda:
            self.encoder.cuda()
            self.mlp.cuda()
            self.action_head.cuda()
            self.value_head.cuda()
        
    @override(TorchModelV2)
    def forward(self, input_dict, state, seq_lens):
        obs = input_dict['obs'].permute(0, 3, 1, 2).float() / 255.0
        if self.use_cuda:
            obs.cuda()
        
        with torch.no_grad():
            features = self.encoder(obs)
        features = self.bn(features)
        features = self.mlp(features)
        action = self.action_head(features)
        self.last_value = self.value_head(features).squeeze(1)
        return action, state
    
    @override(TorchModelV2)
    def value_function(self):
        assert self.last_value is not None, "must call forward() first"
        return self.last_value

In [4]:
ModelCatalog.register_custom_model("my_torch_model", MyModelClass)

In [5]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

class RewardWrapper(gym.RewardWrapper):
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, rew):
        if rew == 0:
            rew = -0.01
        return rew
    
def env_creator(env_config):
    env = gym.make('IGLUSilentBuilder-v0', max_steps=1000)
    env.update_taskset(TaskSet(preset=['C32']))
    env = PovOnlyWrapper(env)
    env = SelectAndPlace(env)
    env = Discretization(env, flat_action_space('human-level'))
    env = RewardWrapper(env)
    return env

from ray.tune.registry import register_env
register_env("my_env", env_creator)

from ray import tune
from ray.rllib.agents.ppo import PPOTrainer

In [6]:
from ray.tune.integration.wandb import WandbLogger

analysis = tune.run(PPOTrainer, 
         config={
             "env": "my_env", 
             "framework": "torch",
             "num_gpus": 1,
             "num_workers": 1,
             "sgd_minibatch_size": 256,
             "clip_param": 0.2,
             "entropy_coeff": 0.01,
             "lambda": 0.95,
             "train_batch_size": 1000,
             #"gamma": 0.99,
             "model": {
                    # Specify our custom model from above.
                    "custom_model": "my_torch_model",
                    # Extra kwargs to be passed to your model's c'tor.
                    "custom_model_config": {},
              },
             "logger_config": {
                  "wandb": {
                      "project": "IGLU-Minecraft",
                      "name":  "PPO C32 pretrained (frozen AngelaCNN + BN + MLP) (3 noops after placement) r: -0.01"
                  }
              }

        },
        loggers=[WandbLogger])



Trial name,status,loc
PPO_my_env_bff91_00000,PENDING,


2021-10-23 19:59:37,170	INFO wandb.py:170 -- Already logged into W&B.
2021-10-23 19:59:37,180	ERROR syncer.py:72 -- Log sync requires rsync to be installed.
[34m[1mwandb[0m: Currently logged in as: [33mlinar[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


[2m[36m(pid=57533)[0m 2021-10-23 19:59:40,655	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=57533)[0m 2021-10-23 19:59:40,655	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1000
  custom_metrics: {}
  date: 2021-10-23_20-00-51
  done: false
  episode_len_mean: 404.0
  episode_media: {}
  episode_reward_max: -4.089999999999957
  episode_reward_mean: -5.504999999999947
  episode_reward_min: -6.919999999999936
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8842074553171795
          entropy_coeff: 0.009999999999999998
          kl: 0.006456960763575302
          policy_loss: -0.11997320271200604
          total_loss: -0.08765164100461537
          vf_explained_var: -0.333754301071167
          vf_loss: 0.05987224327141626
    num_agent_steps_sampled: 1000
    num_agent_steps_trained: 1000
    num_steps_sampled: 1000
    num_steps_trained: 1000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1,65.1932,1000,-5.505,-4.09,-6.92,404


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 2000
  custom_metrics: {}
  date: 2021-10-23_20-01-16
  done: false
  episode_len_mean: 397.0
  episode_media: {}
  episode_reward_max: -3.8099999999999627
  episode_reward_mean: -6.624999999999953
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8800373209847345
          entropy_coeff: 0.009999999999999998
          kl: 0.005599747898264063
          policy_loss: 0.003479890525341034
          total_loss: 0.2959221008751127
          vf_explained_var: 0.006927939131855965
          vf_loss: 0.32012263719613354
    num_agent_steps_sampled: 2000
    num_agent_steps_trained: 2000
    num_steps_sampled: 2000
    num_steps_trained: 2000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,2,90.0023,2000,-6.625,-3.81,-11.68,397


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 3000
  custom_metrics: {}
  date: 2021-10-23_20-01-40
  done: false
  episode_len_mean: 397.14285714285717
  episode_media: {}
  episode_reward_max: -3.7699999999999636
  episode_reward_mean: -6.458571428571379
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 7
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8747480975257025
          entropy_coeff: 0.009999999999999998
          kl: 0.008022750978772012
          policy_loss: 0.055439456303914385
          total_loss: 0.05153674888941977
          vf_explained_var: 0.032174769788980484
          vf_loss: 0.023240223651130994
    num_agent_steps_sampled: 3000
    num_agent_steps_trained: 3000
    num_steps_sampled: 3000
    num_steps_trained: 3000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,3,113.76,3000,-6.45857,-3.77,-11.68,397.143


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-10-23_20-02-04
  done: false
  episode_len_mean: 399.9
  episode_media: {}
  episode_reward_max: -3.7699999999999636
  episode_reward_mean: -5.739999999999953
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 10
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8688815858629013
          entropy_coeff: 0.009999999999999998
          kl: 0.010327369973270429
          policy_loss: -0.029025505565934712
          total_loss: -0.03977542896237638
          vf_explained_var: -0.1669977456331253
          vf_loss: 0.015873416927125718
    num_agent_steps_sampled: 4000
    num_agent_steps_trained: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,4,137.74,4000,-5.74,-3.77,-11.68,399.9


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 5000
  custom_metrics: {}
  date: 2021-10-23_20-02-27
  done: false
  episode_len_mean: 399.3333333333333
  episode_media: {}
  episode_reward_max: -3.7699999999999636
  episode_reward_mean: -5.444166666666621
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 12
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8554247034920586
          entropy_coeff: 0.009999999999999998
          kl: 0.007111638959031552
          policy_loss: 0.024771504352490106
          total_loss: 0.005908129985133807
          vf_explained_var: 0.06365722417831421
          vf_loss: 0.008268542657606303
    num_agent_steps_sampled: 5000
    num_agent_steps_trained: 5000
    num_steps_sampled: 5000
    num_steps_trained: 5000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,5,160.53,5000,-5.44417,-3.77,-11.68,399.333


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 6000
  custom_metrics: {}
  date: 2021-10-23_20-02-49
  done: false
  episode_len_mean: 401.2142857142857
  episode_media: {}
  episode_reward_max: -3.7699999999999636
  episode_reward_mean: -5.25571428571424
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 14
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8506789922714235
          entropy_coeff: 0.009999999999999998
          kl: 0.013012807875768178
          policy_loss: -0.07903324373894267
          total_loss: -0.08541800785395834
          vf_explained_var: -0.28562963008880615
          vf_loss: 0.019519462374349435
    num_agent_steps_sampled: 6000
    num_agent_steps_trained: 6000
    num_steps_sampled: 6000
    num_steps_trained: 6000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,6,182.897,6000,-5.25571,-3.77,-11.68,401.214


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 7000
  custom_metrics: {}
  date: 2021-10-23_20-03-11
  done: false
  episode_len_mean: 403.1764705882353
  episode_media: {}
  episode_reward_max: -3.7699999999999636
  episode_reward_mean: -5.055882352941132
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 17
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.829311630460951
          entropy_coeff: 0.009999999999999998
          kl: 0.016075911629013718
          policy_loss: 0.03538597031599945
          total_loss: 0.026458122167322372
          vf_explained_var: -0.02928508073091507
          vf_loss: 0.016150081610410576
    num_agent_steps_sampled: 7000
    num_agent_steps_trained: 7000
    num_steps_sampled: 7000
    num_steps_trained: 7000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,7,204.904,7000,-5.05588,-3.77,-11.68,403.176


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-10-23_20-03-34
  done: false
  episode_len_mean: 401.63157894736844
  episode_media: {}
  episode_reward_max: -3.7399999999999642
  episode_reward_mean: -4.9326315789473245
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 19
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8158777554829917
          entropy_coeff: 0.009999999999999998
          kl: 0.017486249288130137
          policy_loss: -0.13547535818070172
          total_loss: -0.1481693679259883
          vf_explained_var: 0.4230184257030487
          vf_loss: 0.011967515412511096
    num_agent_steps_sampled: 8000
    num_agent_steps_trained: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,8,227.65,8000,-4.93263,-3.74,-11.68,401.632


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 9000
  custom_metrics: {}
  date: 2021-10-23_20-03-57
  done: false
  episode_len_mean: 402.3181818181818
  episode_media: {}
  episode_reward_max: -3.7399999999999642
  episode_reward_mean: -4.814545454545411
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 22
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.82550437980228
          entropy_coeff: 0.009999999999999998
          kl: 0.018702087230566475
          policy_loss: -0.04859256065554089
          total_loss: -0.0602311275071568
          vf_explained_var: 0.3199251592159271
          vf_loss: 0.01287605710224145
    num_agent_steps_sampled: 9000
    num_agent_steps_trained: 9000
    num_steps_sampled: 9000
    num_steps_trained: 9000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,9,250.92,9000,-4.81455,-3.74,-11.68,402.318


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 10000
  custom_metrics: {}
  date: 2021-10-23_20-04-21
  done: false
  episode_len_mean: 402.8333333333333
  episode_media: {}
  episode_reward_max: -3.7399999999999642
  episode_reward_mean: -4.753749999999957
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 24
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.8173779275682236
          entropy_coeff: 0.009999999999999998
          kl: 0.01386509380924205
          policy_loss: -0.11431679601470629
          total_loss: -0.12157516140076849
          vf_explained_var: 0.00039507614565081894
          vf_loss: 0.01814239382122954
    num_agent_steps_sampled: 10000
    num_agent_steps_trained: 10000
    num_steps_sampled: 10000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,10,274.116,10000,-4.75375,-3.74,-11.68,402.833


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 11000
  custom_metrics: {}
  date: 2021-10-23_20-04-43
  done: false
  episode_len_mean: 401.77777777777777
  episode_media: {}
  episode_reward_max: -3.7399999999999642
  episode_reward_mean: -4.662592592592549
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 27
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19999999999999996
          cur_lr: 5.000000000000001e-05
          entropy: 2.786292725139194
          entropy_coeff: 0.009999999999999998
          kl: 0.020304013095382187
          policy_loss: -0.04688897505402565
          total_loss: -0.05589024470084243
          vf_explained_var: 0.397152841091156
          vf_loss: 0.01480085596235262
    num_agent_steps_sampled: 11000
    num_agent_steps_trained: 11000
    num_steps_sampled: 11000
    num_steps_trained: 1100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,11,296.447,11000,-4.66259,-3.74,-11.68,401.778




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-10-23_20-05-24
  done: false
  episode_len_mean: 398.5
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.565333333333291
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 30
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.7734540038638644
          entropy_coeff: 0.009999999999999998
          kl: 0.01793869688279099
          policy_loss: 0.1280278537215458
          total_loss: 0.1166937575985988
          vf_explained_var: 0.8596627712249756
          vf_loss: 0.011018835618678067
    num_agent_steps_sampled: 12000
    num_agent_steps_trained: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,12,337.079,12000,-4.56533,-3.4,-11.68,398.5


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 13000
  custom_metrics: {}
  date: 2021-10-23_20-05-51
  done: false
  episode_len_mean: 398.625
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.530312499999957
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 32
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.75128583378262
          entropy_coeff: 0.009999999999999998
          kl: 0.01984818910455538
          policy_loss: -0.042469983796278636
          total_loss: -0.05211790717310376
          vf_explained_var: 0.5299693942070007
          vf_loss: 0.011910473068969116
    num_agent_steps_sampled: 13000
    num_agent_steps_trained: 13000
    num_steps_sampled: 13000
    num_steps_trained: 13000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,13,364.272,13000,-4.53031,-3.4,-11.68,398.625


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 14000
  custom_metrics: {}
  date: 2021-10-23_20-06-14
  done: false
  episode_len_mean: 400.6470588235294
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.518529411764663
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 34
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3
          cur_lr: 5.000000000000001e-05
          entropy: 2.743181520038181
          entropy_coeff: 0.009999999999999998
          kl: 0.020829434216850334
          policy_loss: -0.10784101382725769
          total_loss: -0.11990375423596965
          vf_explained_var: 0.9059972763061523
          vf_loss: 0.009120243673937187
    num_agent_steps_sampled: 14000
    num_agent_steps_trained: 14000
    num_steps_sampled: 14000
    num_steps_trained: 14000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,14,387.85,14000,-4.51853,-3.4,-11.68,400.647


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 15000
  custom_metrics: {}
  date: 2021-10-23_20-06-38
  done: false
  episode_len_mean: 402.7567567567568
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.498108108108065
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 37
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.706018426683214
          entropy_coeff: 0.009999999999999998
          kl: 0.019200654974621204
          policy_loss: 0.05582734752032492
          total_loss: 0.043174301832914354
          vf_explained_var: 0.9019663333892822
          vf_loss: 0.00576684343525105
    num_agent_steps_sampled: 15000
    num_agent_steps_trained: 15000
    num_steps_sampled: 15000
    num_steps_trained: 15000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,15,411.224,15000,-4.49811,-3.4,-11.68,402.757


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-10-23_20-07-02
  done: false
  episode_len_mean: 403.4102564102564
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.480512820512777
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 39
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.7037156290478177
          entropy_coeff: 0.009999999999999998
          kl: 0.01580256417704125
          policy_loss: -0.06819629263546732
          total_loss: -0.08233904788891475
          vf_explained_var: 0.8372471332550049
          vf_loss: 0.005783245029548804
    num_agent_steps_sampled: 16000
    num_agent_steps_trained: 16000
    num_steps_sampled: 16000
    num_steps_trained: 1600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,16,434.981,16000,-4.48051,-3.4,-11.68,403.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 17000
  custom_metrics: {}
  date: 2021-10-23_20-07-25
  done: false
  episode_len_mean: 404.57142857142856
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.460238095238052
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 42
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4500000000000002
          cur_lr: 5.000000000000001e-05
          entropy: 2.6714555422465005
          entropy_coeff: 0.009999999999999998
          kl: 0.026431538061135344
          policy_loss: -0.07814786562489139
          total_loss: -0.08374368511140347
          vf_explained_var: 0.519277811050415
          vf_loss: 0.009224544421562718
    num_agent_steps_sampled: 17000
    num_agent_steps_trained: 17000
    num_steps_sampled: 17000
    num_steps_trained: 170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,17,458.61,17000,-4.46024,-3.4,-11.68,404.571


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 18000
  custom_metrics: {}
  date: 2021-10-23_20-07-48
  done: false
  episode_len_mean: 406.04545454545456
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.456136363636319
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 44
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6749999999999998
          cur_lr: 5.000000000000001e-05
          entropy: 2.6299474398295084
          entropy_coeff: 0.009999999999999998
          kl: 0.03953350732723872
          policy_loss: 0.009371145649088754
          total_loss: 0.01672259925140275
          vf_explained_var: 0.47171080112457275
          vf_loss: 0.006965810370941957
    num_agent_steps_sampled: 18000
    num_agent_steps_trained: 18000
    num_steps_sampled: 18000
    num_steps_trained: 180

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,18,481.459,18000,-4.45614,-3.4,-11.68,406.045


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 19000
  custom_metrics: {}
  date: 2021-10-23_20-08-11
  done: false
  episode_len_mean: 407.04347826086956
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.448913043478217
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 46
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0125
          cur_lr: 5.000000000000001e-05
          entropy: 2.654770927959018
          entropy_coeff: 0.009999999999999998
          kl: 0.022958168615069656
          policy_loss: -0.05469299712114864
          total_loss: -0.05065683234069082
          vf_explained_var: 0.3578365445137024
          vf_loss: 0.007338726743020945
    num_agent_steps_sampled: 19000
    num_agent_steps_trained: 19000
    num_steps_sampled: 19000
    num_steps_trained: 19000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,19,504.587,19000,-4.44891,-3.4,-11.68,407.043


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-10-23_20-08-35
  done: false
  episode_len_mean: 407.57142857142856
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.431020408163222
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 49
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5187500000000007
          cur_lr: 5.000000000000001e-05
          entropy: 2.6616868416468304
          entropy_coeff: 0.009999999999999998
          kl: 0.022810885154952606
          policy_loss: 0.03870727655788263
          total_loss: 0.05374175194236967
          vf_explained_var: 0.20586484670639038
          vf_loss: 0.00700731611965845
    num_agent_steps_sampled: 20000
    num_agent_steps_trained: 20000
    num_steps_sampled: 20000
    num_steps_trained: 2000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,20,528.244,20000,-4.43102,-3.4,-11.68,407.571


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 21000
  custom_metrics: {}
  date: 2021-10-23_20-09-01
  done: false
  episode_len_mean: 407.2156862745098
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.413529411764663
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 51
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2781249999999993
          cur_lr: 5.000000000000001e-05
          entropy: 2.6570737494362726
          entropy_coeff: 0.009999999999999998
          kl: 0.023217284710456927
          policy_loss: -0.009354086551401351
          total_loss: 0.02482905379599995
          vf_explained_var: 0.5029480457305908
          vf_loss: 0.007862001851511499
    num_agent_steps_sampled: 21000
    num_agent_steps_trained: 21000
    num_steps_sampled: 21000
    num_steps_trained: 210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,21,553.852,21000,-4.41353,-3.4,-11.68,407.216


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 22000
  custom_metrics: {}
  date: 2021-10-23_20-09-24
  done: false
  episode_len_mean: 407.0566037735849
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.399056603773541
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 53
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4171875
          cur_lr: 5.000000000000001e-05
          entropy: 2.584075869454278
          entropy_coeff: 0.009999999999999998
          kl: 0.009329831398402651
          policy_loss: -0.1592173079235686
          total_loss: -0.1465475398219294
          vf_explained_var: 0.28839996457099915
          vf_loss: 0.006628743458228807
    num_agent_steps_sampled: 22000
    num_agent_steps_trained: 22000
    num_steps_sampled: 22000
    num_steps_trained: 22000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,22,577.515,22000,-4.39906,-3.4,-11.68,407.057


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 23000
  custom_metrics: {}
  date: 2021-10-23_20-09-46
  done: false
  episode_len_mean: 409.57142857142856
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.406607142857099
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 56
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4171875
          cur_lr: 5.000000000000001e-05
          entropy: 2.5763857709036935
          entropy_coeff: 0.009999999999999998
          kl: 0.016838987140605846
          policy_loss: -0.07905039509965314
          total_loss: -0.033403235715296534
          vf_explained_var: 0.3724333345890045
          vf_loss: 0.013869040330044097
    num_agent_steps_sampled: 23000
    num_agent_steps_trained: 23000
    num_steps_sampled: 23000
    num_steps_trained: 23000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,23,599.396,23000,-4.40661,-3.4,-11.68,409.571


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-10-23_20-10-08
  done: false
  episode_len_mean: 410.7758620689655
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.407931034482714
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 58
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4171875
          cur_lr: 5.000000000000001e-05
          entropy: 2.6386552943123713
          entropy_coeff: 0.009999999999999998
          kl: 0.022456646213928858
          policy_loss: 0.04803856992059284
          total_loss: 0.10827007446851995
          vf_explained_var: -0.19946573674678802
          vf_loss: 0.009879484749399125
    num_agent_steps_sampled: 24000
    num_agent_steps_trained: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,24,621.604,24000,-4.40793,-3.4,-11.68,410.776




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 25000
  custom_metrics: {}
  date: 2021-10-23_20-10-50
  done: false
  episode_len_mean: 411.96666666666664
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.40983333333329
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 60
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.125781250000001
          cur_lr: 5.000000000000001e-05
          entropy: 2.6023688210381404
          entropy_coeff: 0.009999999999999998
          kl: 0.04645030924843417
          policy_loss: -0.05534761434213983
          total_loss: 0.17626828799645106
          vf_explained_var: 0.5464743971824646
          vf_loss: 0.01954547544527385
    num_agent_steps_sampled: 25000
    num_agent_steps_trained: 25000
    num_steps_sampled: 25000
    num_steps_trained: 25000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,25,663.487,25000,-4.40983,-3.4,-11.68,411.967


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 26000
  custom_metrics: {}
  date: 2021-10-23_20-11-15
  done: false
  episode_len_mean: 413.46774193548384
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.415483870967698
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 62
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.688671874999997
          cur_lr: 5.000000000000001e-05
          entropy: 2.4755083905325996
          entropy_coeff: 0.009999999999999998
          kl: 0.03255319442186091
          policy_loss: -0.014060274635752042
          total_loss: 0.22827990651130675
          vf_explained_var: 0.5377042293548584
          vf_loss: 0.01680443056134714
    num_agent_steps_sampled: 26000
    num_agent_steps_trained: 26000
    num_steps_sampled: 26000
    num_steps_trained: 26000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,26,687.962,26000,-4.41548,-3.4,-11.68,413.468


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 27000
  custom_metrics: {}
  date: 2021-10-23_20-11-39
  done: false
  episode_len_mean: 415.40625
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.426093749999955
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 64
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 11.533007812500003
          cur_lr: 5.000000000000001e-05
          entropy: 2.3386745903227064
          entropy_coeff: 0.009999999999999998
          kl: 0.019459899401085244
          policy_loss: -0.05686958912346098
          total_loss: 0.1591819833136267
          vf_explained_var: 0.49512550234794617
          vf_loss: 0.015007150462932058
    num_agent_steps_sampled: 27000
    num_agent_steps_trained: 27000
    num_steps_sampled: 27000
    num_steps_trained: 27000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,27,711.992,27000,-4.42609,-3.4,-11.68,415.406


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-10-23_20-12-04
  done: false
  episode_len_mean: 416.7014925373134
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.426865671641746
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 67
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 11.533007812500003
          cur_lr: 5.000000000000001e-05
          entropy: 2.464505002233717
          entropy_coeff: 0.009999999999999998
          kl: 0.0190722800222122
          policy_loss: 0.05019524155391587
          total_loss: 0.2646505886481868
          vf_explained_var: 0.04099016264081001
          vf_loss: 0.019139638667305312
    num_agent_steps_sampled: 28000
    num_agent_steps_trained: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,28,737.357,28000,-4.42687,-3.4,-11.68,416.701


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 29000
  custom_metrics: {}
  date: 2021-10-23_20-12-28
  done: false
  episode_len_mean: 417.4927536231884
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.42724637681155
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 69
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 11.533007812500003
          cur_lr: 5.000000000000001e-05
          entropy: 2.399299489127265
          entropy_coeff: 0.009999999999999998
          kl: 0.03961188937019971
          policy_loss: -0.018989456983076202
          total_loss: 0.428685810458329
          vf_explained_var: 0.46459662914276123
          vf_loss: 0.014824038568056292
    num_agent_steps_sampled: 29000
    num_agent_steps_trained: 29000
    num_steps_sampled: 29000
    num_steps_trained: 29000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,29,761.475,29000,-4.42725,-3.4,-11.68,417.493


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 30000
  custom_metrics: {}
  date: 2021-10-23_20-12-50
  done: false
  episode_len_mean: 418.98591549295776
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.435070422535167
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 71
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 17.299511718750004
          cur_lr: 5.000000000000001e-05
          entropy: 2.403047553698222
          entropy_coeff: 0.009999999999999998
          kl: 0.030579253239557146
          policy_loss: -0.09753949170311292
          total_loss: 0.430919541998042
          vf_explained_var: 0.23082804679870605
          vf_loss: 0.0234833763136218
    num_agent_steps_sampled: 30000
    num_agent_steps_trained: 30000
    num_steps_sampled: 30000
    num_steps_trained: 30000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,30,783.12,30000,-4.43507,-3.4,-11.68,418.986


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 31000
  custom_metrics: {}
  date: 2021-10-23_20-13-10
  done: false
  episode_len_mean: 421.3150684931507
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.451643835616392
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 73
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 25.94926757812499
          cur_lr: 5.000000000000001e-05
          entropy: 2.021889411078559
          entropy_coeff: 0.009999999999999998
          kl: 0.05668485653069284
          policy_loss: 0.0025978359083334607
          total_loss: 1.4864825838969813
          vf_explained_var: -0.14396879076957703
          vf_loss: 0.033173121615416475
    num_agent_steps_sampled: 31000
    num_agent_steps_trained: 31000
    num_steps_sampled: 31000
    num_steps_trained: 31000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,31,803.275,31000,-4.45164,-3.4,-11.68,421.315


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-10-23_20-13-34
  done: false
  episode_len_mean: 422.06666666666666
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.452799999999954
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 75
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 38.92390136718751
          cur_lr: 5.000000000000001e-05
          entropy: 2.2600860595703125
          entropy_coeff: 0.009999999999999998
          kl: 0.03384022143048545
          policy_loss: -0.007171207004123264
          total_loss: 1.309688728633854
          vf_explained_var: 0.0215865857899189
          vf_loss: 0.022267389856278897
    num_agent_steps_sampled: 32000
    num_agent_steps_trained: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,32,826.79,32000,-4.4528,-3.4,-11.68,422.067


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 33000
  custom_metrics: {}
  date: 2021-10-23_20-13-57
  done: false
  episode_len_mean: 422.0128205128205
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.443333333333287
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 78
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 58.38585205078125
          cur_lr: 5.000000000000001e-05
          entropy: 2.1660860962337916
          entropy_coeff: 0.009999999999999998
          kl: 0.01776850727490253
          policy_loss: 0.02025014220012559
          total_loss: 1.045506219069163
          vf_explained_var: 0.42931103706359863
          vf_loss: 0.009487491220028864
    num_agent_steps_sampled: 33000
    num_agent_steps_trained: 33000
    num_steps_sampled: 33000
    num_steps_trained: 33000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,33,850.171,33000,-4.44333,-3.4,-11.68,422.013


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 34000
  custom_metrics: {}
  date: 2021-10-23_20-14-21
  done: false
  episode_len_mean: 422.325
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.440874999999953
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 80
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 58.38585205078125
          cur_lr: 5.000000000000001e-05
          entropy: 2.2327414247724744
          entropy_coeff: 0.009999999999999998
          kl: 0.0276106057740334
          policy_loss: 0.07607662710878584
          total_loss: 1.6787331971857282
          vf_explained_var: -0.3340350389480591
          vf_loss: 0.012915202043950557
    num_agent_steps_sampled: 34000
    num_agent_steps_trained: 34000
    num_steps_sampled: 34000
    num_steps_trained: 34000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,34,873.682,34000,-4.44087,-3.4,-11.68,422.325


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 35000
  custom_metrics: {}
  date: 2021-10-23_20-14-45
  done: false
  episode_len_mean: 422.6707317073171
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.439024390243855
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 82
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 87.57877807617187
          cur_lr: 5.000000000000001e-05
          entropy: 2.106811857223511
          entropy_coeff: 0.009999999999999998
          kl: 0.017248582291520303
          policy_loss: -0.04370667255587048
          total_loss: 1.4665036098824606
          vf_explained_var: -0.3641115725040436
          vf_loss: 0.02066859494273861
    num_agent_steps_sampled: 35000
    num_agent_steps_trained: 35000
    num_steps_sampled: 35000
    num_steps_trained: 35000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,35,897.479,35000,-4.43902,-3.4,-11.68,422.671


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 36000
  custom_metrics: {}
  date: 2021-10-23_20-15-10
  done: false
  episode_len_mean: 421.2823529411765
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.417647058823483
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 85
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 87.57877807617187
          cur_lr: 5.000000000000001e-05
          entropy: 1.9483228908644783
          entropy_coeff: 0.009999999999999998
          kl: 0.032501290252225264
          policy_loss: 0.0365930099454191
          total_loss: 2.880249972641468
          vf_explained_var: -0.13482695817947388
          vf_loss: 0.016716802968747085
    num_agent_steps_sampled: 36000
    num_agent_steps_trained: 36000
    num_steps_sampled: 36000
    num_steps_trained: 36000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,36,923.161,36000,-4.41765,-3.4,-11.68,421.282


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 37000
  custom_metrics: {}
  date: 2021-10-23_20-15-34
  done: false
  episode_len_mean: 421.01149425287355
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.410229885057424
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 87
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 131.36816711425786
          cur_lr: 5.000000000000001e-05
          entropy: 1.9943492306603325
          entropy_coeff: 0.009999999999999998
          kl: 0.02776631402472655
          policy_loss: -0.04249158435397678
          total_loss: 3.6028643548488617
          vf_explained_var: -0.404125839471817
          vf_loss: 0.01768974484875798
    num_agent_steps_sampled: 37000
    num_agent_steps_trained: 37000
    num_steps_sampled: 37000
    num_steps_trained: 37000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,37,947.205,37000,-4.41023,-3.4,-11.68,421.011




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 38000
  custom_metrics: {}
  date: 2021-10-23_20-16-14
  done: false
  episode_len_mean: 419.9111111111111
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.39255555555551
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 90
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 197.05225067138667
          cur_lr: 5.000000000000001e-05
          entropy: 1.9590930448638069
          entropy_coeff: 0.009999999999999998
          kl: 0.017586263442515498
          policy_loss: 0.12422967838744323
          total_loss: 3.58620844648944
          vf_explained_var: -0.060028161853551865
          vf_loss: 0.016156961541208957
    num_agent_steps_sampled: 38000
    num_agent_steps_trained: 38000
    num_steps_sampled: 38000
    num_steps_trained: 38000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,38,986.746,38000,-4.39256,-3.4,-11.68,419.911


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 39000
  custom_metrics: {}
  date: 2021-10-23_20-16-41
  done: false
  episode_len_mean: 418.83870967741933
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.3755913978494165
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 93
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 197.05225067138667
          cur_lr: 5.000000000000001e-05
          entropy: 1.9944972647560968
          entropy_coeff: 0.009999999999999998
          kl: 0.024838114767852755
          policy_loss: 0.04464508460627662
          total_loss: 4.9403895537058515
          vf_explained_var: -0.3404256999492645
          vf_loss: 0.021283130151116186
    num_agent_steps_sampled: 39000
    num_agent_steps_trained: 39000
    num_steps_sampled: 39000
    num_steps_trained: 390

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,39,1013.91,39000,-4.37559,-3.4,-11.68,418.839


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 40000
  custom_metrics: {}
  date: 2021-10-23_20-17-07
  done: false
  episode_len_mean: 418.0210526315789
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.363473684210481
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 95
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 295.57837600708007
          cur_lr: 5.000000000000001e-05
          entropy: 1.7502587980694242
          entropy_coeff: 0.009999999999999998
          kl: 0.007739881123416126
          policy_loss: -0.10172492480940289
          total_loss: 2.190870527426402
          vf_explained_var: -0.525911808013916
          vf_loss: 0.022356590597579877
    num_agent_steps_sampled: 40000
    num_agent_steps_trained: 40000
    num_steps_sampled: 40000
    num_steps_trained: 40000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,40,1039.92,40000,-4.36347,-3.4,-11.68,418.021


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 41000
  custom_metrics: {}
  date: 2021-10-23_20-17-33
  done: false
  episode_len_mean: 417.0
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.347653061224445
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 3
  episodes_total: 98
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 295.57837600708007
          cur_lr: 5.000000000000001e-05
          entropy: 1.912933193312751
          entropy_coeff: 0.009999999999999998
          kl: 0.014705765239583949
          policy_loss: 0.08060076956947644
          total_loss: 4.4306088328361515
          vf_explained_var: -0.11413024365901947
          vf_loss: 0.022431273634235065
    num_agent_steps_sampled: 41000
    num_agent_steps_trained: 41000
    num_steps_sampled: 41000
    num_steps_trained: 41000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,41,1065.44,41000,-4.34765,-3.4,-11.68,417


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 42000
  custom_metrics: {}
  date: 2021-10-23_20-17-55
  done: false
  episode_len_mean: 417.35
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.347599999999955
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 100
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 295.57837600708007
          cur_lr: 5.000000000000001e-05
          entropy: 1.7575964993900723
          entropy_coeff: 0.009999999999999998
          kl: 0.027581527932650514
          policy_loss: 0.003627354817257987
          total_loss: 8.162745699617599
          vf_explained_var: -0.34815630316734314
          vf_loss: 0.024191235189533068
    num_agent_steps_sampled: 42000
    num_agent_steps_trained: 42000
    num_steps_sampled: 42000
    num_steps_trained: 42000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,42,1088.03,42000,-4.3476,-3.4,-11.68,417.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 43000
  custom_metrics: {}
  date: 2021-10-23_20-18-18
  done: false
  episode_len_mean: 417.91
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.323899999999955
  episode_reward_min: -11.679999999999955
  episodes_this_iter: 2
  episodes_total: 102
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 443.3675640106203
          cur_lr: 5.000000000000001e-05
          entropy: 1.5716770940356783
          entropy_coeff: 0.009999999999999998
          kl: 0.026576801591242354
          policy_loss: -0.05253558804591497
          total_loss: 11.736757242679596
          vf_explained_var: 0.4360876679420471
          vf_loss: 0.02171802070612709
    num_agent_steps_sampled: 43000
    num_agent_steps_trained: 43000
    num_steps_sampled: 43000
    num_steps_trained: 43000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,43,1110.66,43000,-4.3239,-3.4,-11.68,417.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 44000
  custom_metrics: {}
  date: 2021-10-23_20-18-46
  done: false
  episode_len_mean: 417.65
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.176499999999956
  episode_reward_min: -5.159999999999934
  episodes_this_iter: 3
  episodes_total: 105
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 665.05134601593
          cur_lr: 5.000000000000001e-05
          entropy: 1.2825979722870722
          entropy_coeff: 0.009999999999999998
          kl: 0.04482407752527959
          policy_loss: -0.0163387570116255
          total_loss: 29.8158324347602
          vf_explained_var: -0.3094426989555359
          vf_loss: 0.034684971233622894
    num_agent_steps_sampled: 44000
    num_agent_steps_trained: 44000
    num_steps_sampled: 44000
    num_steps_trained: 44000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,44,1138.26,44000,-4.1765,-3.4,-5.16,417.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 45000
  custom_metrics: {}
  date: 2021-10-23_20-19-07
  done: false
  episode_len_mean: 419.27
  episode_media: {}
  episode_reward_max: -3.3999999999999715
  episode_reward_mean: -4.192699999999955
  episode_reward_min: -5.559999999999926
  episodes_this_iter: 2
  episodes_total: 107
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 997.5770190238952
          cur_lr: 5.000000000000001e-05
          entropy: 1.1761284377839831
          entropy_coeff: 0.009999999999999998
          kl: 0.06173331384650535
          policy_loss: -0.0837941007481681
          total_loss: 61.5298419740465
          vf_explained_var: -0.26189136505126953
          vf_loss: 0.041662265588012004
    num_agent_steps_sampled: 45000
    num_agent_steps_trained: 45000
    num_steps_sampled: 45000
    num_steps_trained: 45000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,45,1160.08,45000,-4.1927,-3.4,-5.56,419.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 46000
  custom_metrics: {}
  date: 2021-10-23_20-19-31
  done: false
  episode_len_mean: 419.35
  episode_media: {}
  episode_reward_max: -3.369999999999972
  episode_reward_mean: -4.193499999999954
  episode_reward_min: -5.559999999999926
  episodes_this_iter: 3
  episodes_total: 110
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1496.3655285358436
          cur_lr: 5.000000000000001e-05
          entropy: 1.0837614324357774
          entropy_coeff: 0.009999999999999998
          kl: 0.028957781692345936
          policy_loss: -0.007379058168994056
          total_loss: 43.34501268598768
          vf_explained_var: 0.06669425219297409
          vf_loss: 0.03180543678916163
    num_agent_steps_sampled: 46000
    num_agent_steps_trained: 46000
    num_steps_sampled: 46000
    num_steps_trained: 46000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,46,1184.04,46000,-4.1935,-3.37,-5.56,419.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 47000
  custom_metrics: {}
  date: 2021-10-23_20-19-56
  done: false
  episode_len_mean: 419.05
  episode_media: {}
  episode_reward_max: -3.319999999999973
  episode_reward_mean: -4.190499999999955
  episode_reward_min: -5.559999999999926
  episodes_this_iter: 2
  episodes_total: 112
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2244.5482928037636
          cur_lr: 5.000000000000001e-05
          entropy: 0.9480428106255001
          entropy_coeff: 0.009999999999999998
          kl: 0.023611365232823624
          policy_loss: -0.11248424483670129
          total_loss: 52.90119760831197
          vf_explained_var: 0.28409573435783386
          vf_loss: 0.026311207024587526
    num_agent_steps_sampled: 47000
    num_agent_steps_trained: 47000
    num_steps_sampled: 47000
    num_steps_trained: 47000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,47,1208.77,47000,-4.1905,-3.32,-5.56,419.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 48000
  custom_metrics: {}
  date: 2021-10-23_20-20-20
  done: false
  episode_len_mean: 419.19
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -4.191899999999955
  episode_reward_min: -6.009999999999916
  episodes_this_iter: 3
  episodes_total: 115
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3366.8224392056463
          cur_lr: 5.000000000000001e-05
          entropy: 1.340146583980984
          entropy_coeff: 0.009999999999999998
          kl: 0.34190289185175465
          policy_loss: -0.02603119577591618
          total_loss: 1151.1917874760097
          vf_explained_var: -0.13462546467781067
          vf_loss: 0.10486887024922503
    num_agent_steps_sampled: 48000
    num_agent_steps_trained: 48000
    num_steps_sampled: 48000
    num_steps_trained: 48000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,48,1232.76,48000,-4.1919,-3.21,-6.01,419.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 49000
  custom_metrics: {}
  date: 2021-10-23_20-20-39
  done: false
  episode_len_mean: 421.39
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -4.2138999999999545
  episode_reward_min: -6.31999999999991
  episodes_this_iter: 2
  episodes_total: 117
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5050.233658808471
          cur_lr: 5.000000000000001e-05
          entropy: 0.7440440396467844
          entropy_coeff: 0.009999999999999998
          kl: 0.026890729357384974
          policy_loss: 0.16635718336328864
          total_loss: 136.02480710347493
          vf_explained_var: -0.06673011928796768
          vf_loss: 0.06141745079722669
    num_agent_steps_sampled: 49000
    num_agent_steps_trained: 49000
    num_steps_sampled: 49000
    num_steps_trained: 49000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,49,1251.32,49000,-4.2139,-3.21,-6.32,421.39




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 50000
  custom_metrics: {}
  date: 2021-10-23_20-21-25
  done: false
  episode_len_mean: 418.55
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.185499999999955
  episode_reward_min: -6.31999999999991
  episodes_this_iter: 3
  episodes_total: 120
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7575.350488212702
          cur_lr: 5.000000000000001e-05
          entropy: 0.8473287628756629
          entropy_coeff: 0.009999999999999998
          kl: 0.01950293249036703
          policy_loss: 0.011574512782196204
          total_loss: 147.78845609029133
          vf_explained_var: -0.18231628835201263
          vf_loss: 0.04380385225845708
    num_agent_steps_sampled: 50000
    num_agent_steps_trained: 50000
    num_steps_sampled: 50000
    num_steps_trained: 50000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,50,1297.32,50000,-4.1855,-2.51,-6.32,418.55


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 51000
  custom_metrics: {}
  date: 2021-10-23_20-21-51
  done: false
  episode_len_mean: 419.36
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.193599999999955
  episode_reward_min: -6.31999999999991
  episodes_this_iter: 2
  episodes_total: 122
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7575.350488212702
          cur_lr: 5.000000000000001e-05
          entropy: 0.5835026737716463
          entropy_coeff: 0.009999999999999998
          kl: 0.2159579365307258
          policy_loss: 0.06746898525291019
          total_loss: 1636.0853349473741
          vf_explained_var: -0.6806487441062927
          vf_loss: 0.06672794801286525
    num_agent_steps_sampled: 51000
    num_agent_steps_trained: 51000
    num_steps_sampled: 51000
    num_steps_trained: 51000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,51,1323.51,51000,-4.1936,-2.51,-6.32,419.36


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 52000
  custom_metrics: {}
  date: 2021-10-23_20-22-14
  done: false
  episode_len_mean: 417.27
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.172699999999955
  episode_reward_min: -6.31999999999991
  episodes_this_iter: 2
  episodes_total: 124
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 11363.02573231906
          cur_lr: 5.000000000000001e-05
          entropy: 0.8816458029879464
          entropy_coeff: 0.009999999999999998
          kl: 0.04762701058967246
          policy_loss: -0.10723443768090672
          total_loss: 541.1074540032281
          vf_explained_var: -0.11949418485164642
          vf_loss: 0.03656908658643564
    num_agent_steps_sampled: 52000
    num_agent_steps_trained: 52000
    num_steps_sampled: 52000
    num_steps_trained: 52000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,52,1346.79,52000,-4.1727,-2.51,-6.32,417.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 53000
  custom_metrics: {}
  date: 2021-10-23_20-22-33
  done: false
  episode_len_mean: 419.25
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.192499999999955
  episode_reward_min: -6.629999999999903
  episodes_this_iter: 2
  episodes_total: 126
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 17044.53859847859
          cur_lr: 5.000000000000001e-05
          entropy: 0.6942214644617505
          entropy_coeff: 0.009999999999999998
          kl: 0.015442090194361905
          policy_loss: -0.2401251756482654
          total_loss: 263.0056202782525
          vf_explained_var: 0.8060837388038635
          vf_loss: 0.049379527806821796
    num_agent_steps_sampled: 53000
    num_agent_steps_trained: 53000
    num_steps_sampled: 53000
    num_steps_trained: 53000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,53,1365.77,53000,-4.1925,-2.51,-6.63,419.25


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 54000
  custom_metrics: {}
  date: 2021-10-23_20-22-47
  done: false
  episode_len_mean: 427.37
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.273699999999952
  episode_reward_min: -7.999999999999874
  episodes_this_iter: 2
  episodes_total: 128
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 17044.53859847859
          cur_lr: 5.000000000000001e-05
          entropy: 0.5524725764989853
          entropy_coeff: 0.009999999999999998
          kl: 0.030647364388116532
          policy_loss: 0.05182630742589633
          total_loss: 522.4474031660292
          vf_explained_var: 0.588581919670105
          vf_loss: 0.030896224619613753
    num_agent_steps_sampled: 54000
    num_agent_steps_trained: 54000
    num_steps_sampled: 54000
    num_steps_trained: 54000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,54,1379.29,54000,-4.2737,-2.51,-8,427.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 55000
  custom_metrics: {}
  date: 2021-10-23_20-23-00
  done: false
  episode_len_mean: 430.97
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.309699999999952
  episode_reward_min: -7.999999999999874
  episodes_this_iter: 1
  episodes_total: 129
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 25566.807897717874
          cur_lr: 5.000000000000001e-05
          entropy: 0.4119691816469034
          entropy_coeff: 0.009999999999999998
          kl: 0.015535651365967674
          policy_loss: 0.134839924176534
          total_loss: 397.40483000013563
          vf_explained_var: 0.5613910555839539
          vf_loss: 0.07708695063160526
    num_agent_steps_sampled: 55000
    num_agent_steps_trained: 55000
    num_steps_sampled: 55000
    num_steps_trained: 55000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,55,1392.92,55000,-4.3097,-2.51,-8,430.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 56000
  custom_metrics: {}
  date: 2021-10-23_20-23-15
  done: false
  episode_len_mean: 435.92
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.359199999999952
  episode_reward_min: -8.349999999999866
  episodes_this_iter: 1
  episodes_total: 130
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 25566.807897717874
          cur_lr: 5.000000000000001e-05
          entropy: 0.4522145338770416
          entropy_coeff: 0.009999999999999998
          kl: 0.011195635172124538
          policy_loss: 0.3778394639492035
          total_loss: 286.67400432162816
          vf_explained_var: 0.2525329887866974
          vf_loss: 0.0640192185011175
    num_agent_steps_sampled: 56000
    num_agent_steps_trained: 56000
    num_steps_sampled: 56000
    num_steps_trained: 56000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,56,1407.22,56000,-4.3592,-2.51,-8.35,435.92


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 57000
  custom_metrics: {}
  date: 2021-10-23_20-23-29
  done: false
  episode_len_mean: 440.86
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.408599999999949
  episode_reward_min: -8.349999999999866
  episodes_this_iter: 2
  episodes_total: 132
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 25566.807897717874
          cur_lr: 5.000000000000001e-05
          entropy: 0.7519583092795478
          entropy_coeff: 0.009999999999999998
          kl: 0.22550259415681165
          policy_loss: 0.04405987045417229
          total_loss: 5767.421723768446
          vf_explained_var: -0.9716556072235107
          vf_loss: 2.0033880766895082
    num_agent_steps_sampled: 57000
    num_agent_steps_trained: 57000
    num_steps_sampled: 57000
    num_steps_trained: 57000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,57,1421.77,57000,-4.4086,-2.51,-8.35,440.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 58000
  custom_metrics: {}
  date: 2021-10-23_20-23-44
  done: false
  episode_len_mean: 444.51
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.445099999999949
  episode_reward_min: -8.349999999999866
  episodes_this_iter: 1
  episodes_total: 133
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 38350.21184657683
          cur_lr: 5.000000000000001e-05
          entropy: 0.7559706979327732
          entropy_coeff: 0.009999999999999998
          kl: 0.05438374482198722
          policy_loss: -0.16245442827542622
          total_loss: 2087.6887400309247
          vf_explained_var: -0.5395477414131165
          vf_loss: 2.2306538124879203
    num_agent_steps_sampled: 58000
    num_agent_steps_trained: 58000
    num_steps_sampled: 58000
    num_steps_trained: 58000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,58,1436.46,58000,-4.4451,-2.51,-8.35,444.51


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 59000
  custom_metrics: {}
  date: 2021-10-23_20-24-00
  done: false
  episode_len_mean: 446.58
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.465799999999948
  episode_reward_min: -8.349999999999866
  episodes_this_iter: 1
  episodes_total: 134
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 57525.317769865236
          cur_lr: 5.000000000000001e-05
          entropy: 0.8470720758040746
          entropy_coeff: 0.009999999999999998
          kl: 0.7024736077628202
          policy_loss: 0.1210117383963532
          total_loss: 40410.89844563802
          vf_explained_var: 0.0904984399676323
          vf_loss: 0.7699331134557724
    num_agent_steps_sampled: 59000
    num_agent_steps_trained: 59000
    num_steps_sampled: 59000
    num_steps_trained: 59000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,59,1452.59,59000,-4.4658,-2.51,-8.35,446.58


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 60000
  custom_metrics: {}
  date: 2021-10-23_20-24-14
  done: false
  episode_len_mean: 454.23
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.542299999999948
  episode_reward_min: -8.779999999999857
  episodes_this_iter: 2
  episodes_total: 136
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 86287.97665479784
          cur_lr: 5.000000000000001e-05
          entropy: 0.490160531964567
          entropy_coeff: 0.009999999999999998
          kl: 0.15041474090216475
          policy_loss: 0.20623223731915155
          total_loss: 12979.3853386773
          vf_explained_var: -0.4940797686576843
          vf_loss: 0.20026650205254554
    num_agent_steps_sampled: 60000
    num_agent_steps_trained: 60000
    num_steps_sampled: 60000
    num_steps_trained: 60000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,60,1466.17,60000,-4.5423,-2.51,-8.78,454.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 61000
  custom_metrics: {}
  date: 2021-10-23_20-24-27
  done: false
  episode_len_mean: 458.92
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.589199999999946
  episode_reward_min: -9.12999999999985
  episodes_this_iter: 1
  episodes_total: 137
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 129431.96498219678
          cur_lr: 5.000000000000001e-05
          entropy: 0.29142569452524186
          entropy_coeff: 0.009999999999999998
          kl: 0.016798152417565385
          policy_loss: 0.2503403724895583
          total_loss: 2174.566022237142
          vf_explained_var: -1.0
          vf_loss: 0.10059492158082624
    num_agent_steps_sampled: 61000
    num_agent_steps_trained: 61000
    num_steps_sampled: 61000
    num_steps_trained: 61000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,61,1479.11,61000,-4.5892,-2.51,-9.13,458.92


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 62000
  custom_metrics: {}
  date: 2021-10-23_20-24-40
  done: false
  episode_len_mean: 463.99
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.639899999999946
  episode_reward_min: -9.12999999999985
  episodes_this_iter: 1
  episodes_total: 138
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 129431.96498219678
          cur_lr: 5.000000000000001e-05
          entropy: 0.222713845802678
          entropy_coeff: 0.009999999999999998
          kl: 0.023591194845115144
          policy_loss: -0.025310834248860676
          total_loss: 3063.8513258192274
          vf_explained_var: 0.21913698315620422
          vf_loss: 10.424075793557696
    num_agent_steps_sampled: 62000
    num_agent_steps_trained: 62000
    num_steps_sampled: 62000
    num_steps_trained: 62000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,62,1492.46,62000,-4.6399,-2.51,-9.13,463.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 63000
  custom_metrics: {}
  date: 2021-10-23_20-24-54
  done: false
  episode_len_mean: 468.28
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.6827999999999435
  episode_reward_min: -9.12999999999985
  episodes_this_iter: 1
  episodes_total: 139
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 194147.94747329512
          cur_lr: 5.000000000000001e-05
          entropy: 0.3044102933050858
          entropy_coeff: 0.009999999999999998
          kl: 0.05622494480016434
          policy_loss: -0.018422267503208585
          total_loss: 10916.707944234213
          vf_explained_var: -0.2882915437221527
          vf_loss: 0.7708813929270643
    num_agent_steps_sampled: 63000
    num_agent_steps_trained: 63000
    num_steps_sampled: 63000
    num_steps_trained: 63000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,63,1505.75,63000,-4.6828,-2.51,-9.13,468.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 64000
  custom_metrics: {}
  date: 2021-10-23_20-25-05
  done: false
  episode_len_mean: 472.85
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.728499999999943
  episode_reward_min: -9.12999999999985
  episodes_this_iter: 1
  episodes_total: 140
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 291221.92120994267
          cur_lr: 5.000000000000001e-05
          entropy: 0.12781030742658508
          entropy_coeff: 0.009999999999999998
          kl: 0.008678056828729394
          policy_loss: -0.08764219250943926
          total_loss: 2528.4110473632813
          vf_explained_var: -0.03980249539017677
          vf_loss: 1.2596902906894685
    num_agent_steps_sampled: 64000
    num_agent_steps_trained: 64000
    num_steps_sampled: 64000
    num_steps_trained: 64000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,64,1517.5,64000,-4.7285,-2.51,-9.13,472.85


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 65000
  custom_metrics: {}
  date: 2021-10-23_20-25-18
  done: false
  episode_len_mean: 478.56
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.785599999999942
  episode_reward_min: -9.849999999999834
  episodes_this_iter: 1
  episodes_total: 141
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 291221.92120994267
          cur_lr: 5.000000000000001e-05
          entropy: 0.19818338172303307
          entropy_coeff: 0.009999999999999998
          kl: 0.009906204448391994
          policy_loss: -0.08408903777599334
          total_loss: 2885.210200330946
          vf_explained_var: -0.021672438830137253
          vf_loss: 0.3925455633136961
    num_agent_steps_sampled: 65000
    num_agent_steps_trained: 65000
    num_steps_sampled: 65000
    num_steps_trained: 65000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,65,1529.88,65000,-4.7856,-2.51,-9.85,478.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 66000
  custom_metrics: {}
  date: 2021-10-23_20-25-30
  done: false
  episode_len_mean: 483.95
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.839499999999941
  episode_reward_min: -9.849999999999834
  episodes_this_iter: 1
  episodes_total: 142
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 291221.92120994267
          cur_lr: 5.000000000000001e-05
          entropy: 0.1042141137851609
          entropy_coeff: 0.009999999999999998
          kl: 0.00896844448418253
          policy_loss: -0.09040126734309727
          total_loss: 2612.2604817708334
          vf_explained_var: -0.049792591482400894
          vf_loss: 0.5443995808561642
    num_agent_steps_sampled: 66000
    num_agent_steps_trained: 66000
    num_steps_sampled: 66000
    num_steps_trained: 66000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,66,1542.49,66000,-4.8395,-2.51,-9.85,483.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 67000
  custom_metrics: {}
  date: 2021-10-23_20-25-43
  done: false
  episode_len_mean: 489.06
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.89059999999994
  episode_reward_min: -9.849999999999834
  episodes_this_iter: 1
  episodes_total: 143
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 291221.92120994267
          cur_lr: 5.000000000000001e-05
          entropy: 0.10213921246015363
          entropy_coeff: 0.009999999999999998
          kl: 0.004995179153370878
          policy_loss: -0.09501988987127939
          total_loss: 1454.8281915452744
          vf_explained_var: -0.017925936728715897
          vf_loss: 0.2186213503488236
    num_agent_steps_sampled: 67000
    num_agent_steps_trained: 67000
    num_steps_sampled: 67000
    num_steps_trained: 67000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,67,1554.66,67000,-4.8906,-2.51,-9.85,489.06


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 68000
  custom_metrics: {}
  date: 2021-10-23_20-25-55
  done: false
  episode_len_mean: 494.41
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.944099999999938
  episode_reward_min: -9.849999999999834
  episodes_this_iter: 1
  episodes_total: 144
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 145610.96060497133
          cur_lr: 5.000000000000001e-05
          entropy: 0.08318333418832885
          entropy_coeff: 0.009999999999999998
          kl: 0.0018038495036307722
          policy_loss: -0.08203357060750326
          total_loss: 262.7227355533176
          vf_explained_var: -0.012255721725523472
          vf_loss: 0.14535583481192588
    num_agent_steps_sampled: 68000
    num_agent_steps_trained: 68000
    num_steps_sampled: 68000
    num_steps_trained: 68000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,68,1566.62,68000,-4.9441,-2.51,-9.85,494.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 69000
  custom_metrics: {}
  date: 2021-10-23_20-26-07
  done: false
  episode_len_mean: 499.77
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -4.997699999999938
  episode_reward_min: -9.849999999999834
  episodes_this_iter: 1
  episodes_total: 145
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 72805.48030248567
          cur_lr: 5.000000000000001e-05
          entropy: 0.07812379577921497
          entropy_coeff: 0.009999999999999998
          kl: 0.003866950049996376
          policy_loss: -0.08780275781949361
          total_loss: 281.67363052368165
          vf_explained_var: -0.010687625035643578
          vf_loss: 0.22707957714382146
    num_agent_steps_sampled: 69000
    num_agent_steps_trained: 69000
    num_steps_sampled: 69000
    num_steps_trained: 69000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,69,1578.75,69000,-4.9977,-2.51,-9.85,499.77


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 70000
  custom_metrics: {}
  date: 2021-10-23_20-26-18
  done: false
  episode_len_mean: 505.15
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.051499999999937
  episode_reward_min: -9.849999999999834
  episodes_this_iter: 1
  episodes_total: 146
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 36402.74015124283
          cur_lr: 5.000000000000001e-05
          entropy: 0.11004660251653857
          entropy_coeff: 0.009999999999999998
          kl: 0.03877056914692124
          policy_loss: 0.0613303263982137
          total_loss: 1423.5082965426975
          vf_explained_var: -0.00531349889934063
          vf_loss: 12.093203708198335
    num_agent_steps_sampled: 70000
    num_agent_steps_trained: 70000
    num_steps_sampled: 70000
    num_steps_trained: 70000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,70,1589.89,70000,-5.0515,-2.51,-9.85,505.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 71000
  custom_metrics: {}
  date: 2021-10-23_20-26-31
  done: false
  episode_len_mean: 511.02
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.110199999999936
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 147
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 54604.110226864235
          cur_lr: 5.000000000000001e-05
          entropy: 0.2572443122458127
          entropy_coeff: 0.009999999999999998
          kl: 0.07464267377379454
          policy_loss: 0.12423695458306207
          total_loss: 4079.820797390408
          vf_explained_var: -0.32689276337623596
          vf_loss: 3.9024589710765416
    num_agent_steps_sampled: 71000
    num_agent_steps_trained: 71000
    num_steps_sampled: 71000
    num_steps_trained: 71000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,71,1602.73,71000,-5.1102,-2.51,-10,511.02


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 72000
  custom_metrics: {}
  date: 2021-10-23_20-26-43
  done: false
  episode_len_mean: 516.75
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.167499999999934
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 148
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 81906.16534029637
          cur_lr: 5.000000000000001e-05
          entropy: 0.16910366778158478
          entropy_coeff: 0.009999999999999998
          kl: 0.009302416857745912
          policy_loss: -0.03147548288106918
          total_loss: 765.8865683661567
          vf_explained_var: -0.4273495078086853
          vf_loss: 3.9944635497199164
    num_agent_steps_sampled: 72000
    num_agent_steps_trained: 72000
    num_steps_sampled: 72000
    num_steps_trained: 72000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,72,1615.04,72000,-5.1675,-2.51,-10,516.75


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 73000
  custom_metrics: {}
  date: 2021-10-23_20-26-55
  done: false
  episode_len_mean: 522.6
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.225999999999933
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 149
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 81906.16534029637
          cur_lr: 5.000000000000001e-05
          entropy: 0.6177735359304481
          entropy_coeff: 0.009999999999999998
          kl: 0.3422952171725531
          policy_loss: 0.13740672833389705
          total_loss: 28043.23829820421
          vf_explained_var: -0.3168020248413086
          vf_loss: 7.018691268894408
    num_agent_steps_sampled: 73000
    num_agent_steps_trained: 73000
    num_steps_sampled: 73000
    num_steps_trained: 73000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,73,1627.25,73000,-5.226,-2.51,-10,522.6




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 74000
  custom_metrics: {}
  date: 2021-10-23_20-27-26
  done: false
  episode_len_mean: 527.17
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.271699999999932
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 150
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 122859.24801044456
          cur_lr: 5.000000000000001e-05
          entropy: 0.4785889857345157
          entropy_coeff: 0.009999999999999998
          kl: 0.15381325430547196
          policy_loss: -0.13191560440593295
          total_loss: 18899.44028998481
          vf_explained_var: 0.37714022397994995
          vf_loss: 2.1963012155559327
    num_agent_steps_sampled: 74000
    num_agent_steps_trained: 74000
    num_steps_sampled: 74000
    num_steps_trained: 74000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,74,1658.42,74000,-5.2717,-2.51,-10,527.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 75000
  custom_metrics: {}
  date: 2021-10-23_20-27-40
  done: false
  episode_len_mean: 537.47
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.374699999999929
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 152
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 184288.8720156669
          cur_lr: 5.000000000000001e-05
          entropy: 0.5776816652880774
          entropy_coeff: 0.009999999999999998
          kl: 0.05212999378434486
          policy_loss: 0.09521262331141365
          total_loss: 9607.808039008247
          vf_explained_var: 0.2816626727581024
          vf_loss: 0.7406692531373765
    num_agent_steps_sampled: 75000
    num_agent_steps_trained: 75000
    num_steps_sampled: 75000
    num_steps_trained: 75000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,75,1671.81,75000,-5.3747,-2.51,-10,537.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 76000
  custom_metrics: {}
  date: 2021-10-23_20-27-53
  done: false
  episode_len_mean: 543.21
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.432099999999927
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 153
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 276433.30802350026
          cur_lr: 5.000000000000001e-05
          entropy: 0.32999124543534386
          entropy_coeff: 0.009999999999999998
          kl: 0.04384110634111696
          policy_loss: 0.06847681063744757
          total_loss: 12121.282535807291
          vf_explained_var: 0.13175025582313538
          vf_loss: 2.075164994928572
    num_agent_steps_sampled: 76000
    num_agent_steps_trained: 76000
    num_steps_sampled: 76000
    num_steps_trained: 76000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,76,1684.47,76000,-5.4321,-2.51,-10,543.21


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 77000
  custom_metrics: {}
  date: 2021-10-23_20-28-06
  done: false
  episode_len_mean: 548.42
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.484199999999927
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 154
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 414649.9620352505
          cur_lr: 5.000000000000001e-05
          entropy: 0.6995391090710957
          entropy_coeff: 0.009999999999999998
          kl: 0.12163319607368774
          policy_loss: 0.10275646766854657
          total_loss: 50435.985571289064
          vf_explained_var: -1.0
          vf_loss: 0.6885643182529344
    num_agent_steps_sampled: 77000
    num_agent_steps_trained: 77000
    num_steps_sampled: 77000
    num_steps_trained: 77000
  iterations_since_restore: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,77,1697.72,77000,-5.4842,-2.51,-10,548.42


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 78000
  custom_metrics: {}
  date: 2021-10-23_20-28-19
  done: false
  episode_len_mean: 553.63
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.536299999999926
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 155
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 621974.9430528757
          cur_lr: 5.000000000000001e-05
          entropy: 0.6682863540119595
          entropy_coeff: 0.009999999999999998
          kl: 0.0071221368503756825
          policy_loss: 0.20509748574760225
          total_loss: 4430.376493326823
          vf_explained_var: -0.8148199319839478
          vf_loss: 0.38750152968698076
    num_agent_steps_sampled: 78000
    num_agent_steps_trained: 78000
    num_steps_sampled: 78000
    num_steps_trained: 78000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,78,1710.69,78000,-5.5363,-2.51,-10,553.63


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 79000
  custom_metrics: {}
  date: 2021-10-23_20-28-31
  done: false
  episode_len_mean: 558.94
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.589399999999927
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 156
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 621974.9430528757
          cur_lr: 5.000000000000001e-05
          entropy: 0.12885757837858464
          entropy_coeff: 0.009999999999999998
          kl: 0.03663859085386826
          policy_loss: 0.07422341472572751
          total_loss: 22792.396392144095
          vf_explained_var: -1.0
          vf_loss: 4.036859317620595
    num_agent_steps_sampled: 79000
    num_agent_steps_trained: 79000
    num_steps_sampled: 79000
    num_steps_trained: 79000
  iterations_since_restore: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,79,1723.06,79000,-5.5894,-2.51,-10,558.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 80000
  custom_metrics: {}
  date: 2021-10-23_20-28-43
  done: false
  episode_len_mean: 564.32
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.643199999999925
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 157
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 932962.4145793135
          cur_lr: 5.000000000000001e-05
          entropy: 0.09703657767838902
          entropy_coeff: 0.009999999999999998
          kl: 0.003256697860908591
          policy_loss: 0.13702492126160198
          total_loss: 3038.7676228841146
          vf_explained_var: -1.0
          vf_loss: 0.2547561279601521
    num_agent_steps_sampled: 80000
    num_agent_steps_trained: 80000
    num_steps_sampled: 80000
    num_steps_trained: 80000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,80,1735.29,80000,-5.6432,-2.51,-10,564.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 81000
  custom_metrics: {}
  date: 2021-10-23_20-28-56
  done: false
  episode_len_mean: 570.05
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.7004999999999235
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 158
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 466481.20728965674
          cur_lr: 5.000000000000001e-05
          entropy: 0.10054474886920717
          entropy_coeff: 0.009999999999999998
          kl: 0.008171201391249067
          policy_loss: 0.09618532649344869
          total_loss: 3812.465846082899
          vf_explained_var: 0.19614559412002563
          vf_loss: 0.6586220797565249
    num_agent_steps_sampled: 81000
    num_agent_steps_trained: 81000
    num_steps_sampled: 81000
    num_steps_trained: 81000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,81,1747.58,81000,-5.7005,-2.51,-10,570.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 82000
  custom_metrics: {}
  date: 2021-10-23_20-29-08
  done: false
  episode_len_mean: 574.89
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.7488999999999235
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 159
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 466481.20728965674
          cur_lr: 5.000000000000001e-05
          entropy: 0.07788742557168007
          entropy_coeff: 0.009999999999999998
          kl: 0.0005936935469637521
          policy_loss: 0.13634318328566022
          total_loss: 277.9300923665365
          vf_explained_var: -1.0
          vf_loss: 0.8476398395167457
    num_agent_steps_sampled: 82000
    num_agent_steps_trained: 82000
    num_steps_sampled: 82000
    num_steps_trained: 82000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,82,1760.01,82000,-5.7489,-2.51,-10,574.89


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 83000
  custom_metrics: {}
  date: 2021-10-23_20-29-21
  done: false
  episode_len_mean: 581.12
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.811199999999923
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 160
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 233240.60364482837
          cur_lr: 5.000000000000001e-05
          entropy: 0.09406209103763104
          entropy_coeff: 0.009999999999999998
          kl: 0.006740021995372243
          policy_loss: 0.06990471225645807
          total_loss: 1573.453722466363
          vf_explained_var: -1.0
          vf_loss: 1.3379209670755599
    num_agent_steps_sampled: 83000
    num_agent_steps_trained: 83000
    num_steps_sampled: 83000
    num_steps_trained: 83000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,83,1772.52,83000,-5.8112,-2.51,-10,581.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 84000
  custom_metrics: {}
  date: 2021-10-23_20-29-33
  done: false
  episode_len_mean: 586.76
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.86759999999992
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 161
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 233240.60364482837
          cur_lr: 5.000000000000001e-05
          entropy: 0.05046704527404573
          entropy_coeff: 0.009999999999999998
          kl: 0.0013675021415110677
          policy_loss: 0.056054444652464654
          total_loss: 321.1782913208008
          vf_explained_var: -1.0
          vf_loss: 2.1657121466265785
    num_agent_steps_sampled: 84000
    num_agent_steps_trained: 84000
    num_steps_sampled: 84000
    num_steps_trained: 84000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,84,1784.98,84000,-5.8676,-2.51,-10,586.76


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 85000
  custom_metrics: {}
  date: 2021-10-23_20-29-46
  done: false
  episode_len_mean: 591.87
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.91869999999992
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 162
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 116620.30182241419
          cur_lr: 5.000000000000001e-05
          entropy: 0.04427581450177564
          entropy_coeff: 0.009999999999999998
          kl: 0.0002226138997583702
          policy_loss: 0.06070383389790853
          total_loss: 26.718650086720785
          vf_explained_var: -1.0
          vf_loss: 0.6970883392625384
    num_agent_steps_sampled: 85000
    num_agent_steps_trained: 85000
    num_steps_sampled: 85000
    num_steps_trained: 85000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,85,1797.27,85000,-5.9187,-2.51,-10,591.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 86000
  custom_metrics: {}
  date: 2021-10-23_20-29-58
  done: false
  episode_len_mean: 597.0
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -5.969999999999918
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 163
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 58310.15091120709
          cur_lr: 5.000000000000001e-05
          entropy: 0.07666339178880056
          entropy_coeff: 0.009999999999999998
          kl: 0.004592239820501871
          policy_loss: 0.12635241287449997
          total_loss: 268.55090857611765
          vf_explained_var: -1.0
          vf_loss: 0.6511189957459768
    num_agent_steps_sampled: 86000
    num_agent_steps_trained: 86000
    num_steps_sampled: 86000
    num_steps_trained: 86000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,86,1809.71,86000,-5.97,-2.51,-10,597


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 87000
  custom_metrics: {}
  date: 2021-10-23_20-30-11
  done: false
  episode_len_mean: 602.28
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.022799999999917
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 164
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 29155.075455603546
          cur_lr: 5.000000000000001e-05
          entropy: 0.07685713925295407
          entropy_coeff: 0.009999999999999998
          kl: 0.0008604613203917527
          policy_loss: 0.15658952173673443
          total_loss: 25.471039221021865
          vf_explained_var: -1.0
          vf_loss: 0.22840311676263808
    num_agent_steps_sampled: 87000
    num_agent_steps_trained: 87000
    num_steps_sampled: 87000
    num_steps_trained: 87000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,87,1822.22,87000,-6.0228,-2.51,-10,602.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 88000
  custom_metrics: {}
  date: 2021-10-23_20-30-23
  done: false
  episode_len_mean: 607.83
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.078299999999914
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 165
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 14577.537727801773
          cur_lr: 5.000000000000001e-05
          entropy: 0.1154529654317432
          entropy_coeff: 0.009999999999999998
          kl: 0.0031831382249947636
          policy_loss: 0.11098183778425058
          total_loss: 46.75693774753147
          vf_explained_var: -1.0
          vf_loss: 0.24479189217090608
    num_agent_steps_sampled: 88000
    num_agent_steps_trained: 88000
    num_steps_sampled: 88000
    num_steps_trained: 88000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,88,1834.7,88000,-6.0783,-2.51,-10,607.83


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 89000
  custom_metrics: {}
  date: 2021-10-23_20-30-37
  done: false
  episode_len_mean: 613.3
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.132999999999914
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 166
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7288.768863900887
          cur_lr: 5.000000000000001e-05
          entropy: 0.07122534960508346
          entropy_coeff: 0.009999999999999998
          kl: 0.0017246816320241325
          policy_loss: 0.08891722410917283
          total_loss: 12.840213595496284
          vf_explained_var: -1.0
          vf_loss: 0.1812025331788593
    num_agent_steps_sampled: 89000
    num_agent_steps_trained: 89000
    num_steps_sampled: 89000
    num_steps_trained: 89000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,89,1848.39,89000,-6.133,-2.51,-10,613.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 90000
  custom_metrics: {}
  date: 2021-10-23_20-30-50
  done: false
  episode_len_mean: 618.91
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.189099999999912
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 167
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3644.3844319504433
          cur_lr: 5.000000000000001e-05
          entropy: 0.07119910410708852
          entropy_coeff: 0.009999999999999998
          kl: 0.0006680248971355872
          policy_loss: 0.12725276036394967
          total_loss: 2.730872384707133
          vf_explained_var: -1.0
          vf_loss: 0.1697920127875275
    num_agent_steps_sampled: 90000
    num_agent_steps_trained: 90000
    num_steps_sampled: 90000
    num_steps_trained: 90000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,90,1861.23,90000,-6.1891,-2.51,-10,618.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 91000
  custom_metrics: {}
  date: 2021-10-23_20-31-02
  done: false
  episode_len_mean: 624.32
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.243199999999913
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 168
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1822.1922159752216
          cur_lr: 5.000000000000001e-05
          entropy: 0.1021169814798567
          entropy_coeff: 0.009999999999999998
          kl: 0.0026677686320100394
          policy_loss: 0.1170072864741087
          total_loss: 5.08638010289934
          vf_explained_var: -0.9999510645866394
          vf_loss: 0.10920660545428594
    num_agent_steps_sampled: 91000
    num_agent_steps_trained: 91000
    num_steps_sampled: 91000
    num_steps_trained: 91000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,91,1874.06,91000,-6.2432,-2.51,-10,624.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 92000
  custom_metrics: {}
  date: 2021-10-23_20-31-16
  done: false
  episode_len_mean: 629.99
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.299899999999911
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 169
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 911.0961079876108
          cur_lr: 5.000000000000001e-05
          entropy: 0.1324378326535225
          entropy_coeff: 0.009999999999999998
          kl: 0.01748409274344643
          policy_loss: 0.10821502844078673
          total_loss: 16.23313359949324
          vf_explained_var: -0.947478711605072
          vf_loss: 0.19655365637607045
    num_agent_steps_sampled: 92000
    num_agent_steps_trained: 92000
    num_steps_sampled: 92000
    num_steps_trained: 92000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,92,1887.54,92000,-6.2999,-2.51,-10,629.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 93000
  custom_metrics: {}
  date: 2021-10-23_20-31-27
  done: false
  episode_len_mean: 635.0
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.34999999999991
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 170
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 911.0961079876108
          cur_lr: 5.000000000000001e-05
          entropy: 0.0746322068075339
          entropy_coeff: 0.009999999999999998
          kl: 0.0036028097331937815
          policy_loss: 0.06873659615715345
          total_loss: 3.533589159117805
          vf_explained_var: -1.0
          vf_loss: 0.18309285293022792
    num_agent_steps_sampled: 93000
    num_agent_steps_trained: 93000
    num_steps_sampled: 93000
    num_steps_trained: 93000
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,93,1898.99,93000,-6.35,-2.51,-10,635


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 94000
  custom_metrics: {}
  date: 2021-10-23_20-31-40
  done: false
  episode_len_mean: 640.58
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.40579999999991
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 171
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 455.5480539938054
          cur_lr: 5.000000000000001e-05
          entropy: 0.08630121325453123
          entropy_coeff: 0.009999999999999998
          kl: 0.0056796449417662286
          policy_loss: 0.076897448549668
          total_loss: 2.8354079524676004
          vf_explained_var: -1.0
          vf_loss: 0.1720222727292114
    num_agent_steps_sampled: 94000
    num_agent_steps_trained: 94000
    num_steps_sampled: 94000
    num_steps_trained: 94000
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,94,1911.96,94000,-6.4058,-2.51,-10,640.58


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 95000
  custom_metrics: {}
  date: 2021-10-23_20-31-53
  done: false
  episode_len_mean: 645.51
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.4550999999999075
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 172
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 455.5480539938054
          cur_lr: 5.000000000000001e-05
          entropy: 0.07032793396049075
          entropy_coeff: 0.009999999999999998
          kl: 0.0016045015266475577
          policy_loss: 0.11210504435002804
          total_loss: 0.9460248159037696
          vf_explained_var: -1.0
          vf_loss: 0.10369548772772154
    num_agent_steps_sampled: 95000
    num_agent_steps_trained: 95000
    num_steps_sampled: 95000
    num_steps_trained: 95000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,95,1924.41,95000,-6.4551,-2.51,-10,645.51


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 96000
  custom_metrics: {}
  date: 2021-10-23_20-32-05
  done: false
  episode_len_mean: 650.46
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.504599999999908
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 173
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 227.7740269969027
          cur_lr: 5.000000000000001e-05
          entropy: 0.06935415375563833
          entropy_coeff: 0.009999999999999998
          kl: 0.0013207341253291816
          policy_loss: 0.09645633515384462
          total_loss: 0.4906192597415712
          vf_explained_var: -1.0
          vf_loss: 0.0940275263455179
    num_agent_steps_sampled: 96000
    num_agent_steps_trained: 96000
    num_steps_sampled: 96000
    num_steps_trained: 96000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,96,1936.68,96000,-6.5046,-2.51,-10,650.46


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 97000
  custom_metrics: {}
  date: 2021-10-23_20-32-18
  done: false
  episode_len_mean: 655.95
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.559499999999905
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 174
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 113.88701349845135
          cur_lr: 5.000000000000001e-05
          entropy: 0.0630393440524737
          entropy_coeff: 0.009999999999999998
          kl: 0.0008834283913731471
          policy_loss: 0.17036282875471645
          total_loss: 0.34221394260724386
          vf_explained_var: -1.0
          vf_loss: 0.07187048126426009
    num_agent_steps_sampled: 97000
    num_agent_steps_trained: 97000
    num_steps_sampled: 97000
    num_steps_trained: 97000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,97,1949.04,97000,-6.5595,-2.51,-10,655.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 98000
  custom_metrics: {}
  date: 2021-10-23_20-32-30
  done: false
  episode_len_mean: 661.39
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.613899999999904
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 175
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 56.943506749225676
          cur_lr: 5.000000000000001e-05
          entropy: 0.0733822324209743
          entropy_coeff: 0.009999999999999998
          kl: 0.0020512897909308473
          policy_loss: 0.1637149100502332
          total_loss: 0.36257002337111366
          vf_explained_var: -1.0
          vf_loss: 0.08278129779630237
    num_agent_steps_sampled: 98000
    num_agent_steps_trained: 98000
    num_steps_sampled: 98000
    num_steps_trained: 98000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,98,1961.37,98000,-6.6139,-2.51,-10,661.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 99000
  custom_metrics: {}
  date: 2021-10-23_20-32-43
  done: false
  episode_len_mean: 667.5
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.674999999999902
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 176
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 28.471753374612838
          cur_lr: 5.000000000000001e-05
          entropy: 0.06715935601128473
          entropy_coeff: 0.009999999999999998
          kl: 0.0017127223920801447
          policy_loss: 0.1427692969640096
          total_loss: 0.2884564891457558
          vf_explained_var: -1.0
          vf_loss: 0.09759457674291398
    num_agent_steps_sampled: 99000
    num_agent_steps_trained: 99000
    num_steps_sampled: 99000
    num_steps_trained: 99000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,99,1974.02,99000,-6.675,-2.51,-10,667.5


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 100000
  custom_metrics: {}
  date: 2021-10-23_20-32-55
  done: false
  episode_len_mean: 673.63
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.736299999999901
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 177
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 14.235876687306419
          cur_lr: 5.000000000000001e-05
          entropy: 0.0678138095471594
          entropy_coeff: 0.009999999999999998
          kl: 0.0013766959008838361
          policy_loss: 0.14412664241260953
          total_loss: 0.22420873724752002
          vf_explained_var: -1.0
          vf_loss: 0.061161760158009
    num_agent_steps_sampled: 100000
    num_agent_steps_trained: 100000
    num_steps_sampled: 100000
    num_steps_trained: 100000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,100,1986.37,100000,-6.7363,-2.51,-10,673.63


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 101000
  custom_metrics: {}
  date: 2021-10-23_20-33-07
  done: false
  episode_len_mean: 678.73
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.7872999999999
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 178
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.1179383436532095
          cur_lr: 5.000000000000001e-05
          entropy: 0.06891770685712496
          entropy_coeff: 0.009999999999999998
          kl: 0.001260933130591487
          policy_loss: 0.1449967886010806
          total_loss: 0.21564727938837475
          vf_explained_var: -1.0
          vf_loss: 0.06236441921856668
    num_agent_steps_sampled: 101000
    num_agent_steps_trained: 101000
    num_steps_sampled: 101000
    num_steps_trained: 101000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,101,1998.83,101000,-6.7873,-2.51,-10,678.73


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 102000
  custom_metrics: {}
  date: 2021-10-23_20-33-20
  done: false
  episode_len_mean: 684.84
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.8483999999999
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 179
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5589691718266048
          cur_lr: 5.000000000000001e-05
          entropy: 0.06802629811896219
          entropy_coeff: 0.009999999999999998
          kl: 0.0017090067073392372
          policy_loss: 0.1465590270029174
          total_loss: 0.22514434887303247
          vf_explained_var: -0.8328947424888611
          vf_loss: 0.0731832834581534
    num_agent_steps_sampled: 102000
    num_agent_steps_trained: 102000
    num_steps_sampled: 102000
    num_steps_trained: 102000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,102,2011.33,102000,-6.8484,-2.51,-10,684.84




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 103000
  custom_metrics: {}
  date: 2021-10-23_20-33-49
  done: false
  episode_len_mean: 688.62
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.8861999999998975
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 180
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7794845859133024
          cur_lr: 5.000000000000001e-05
          entropy: 0.11532993722293113
          entropy_coeff: 0.009999999999999998
          kl: 0.019549991961361632
          policy_loss: -0.017538480874564912
          total_loss: 0.1662178035825491
          vf_explained_var: -0.946964681148529
          vf_loss: 0.15012067192130618
    num_agent_steps_sampled: 103000
    num_agent_steps_trained: 103000
    num_steps_sampled: 103000
    num_steps_trained: 103000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,103,2040.75,103000,-6.8862,-2.51,-10,688.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 104000
  custom_metrics: {}
  date: 2021-10-23_20-34-03
  done: false
  episode_len_mean: 694.34
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.943399999999896
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 181
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7794845859133024
          cur_lr: 5.000000000000001e-05
          entropy: 0.13286382721530066
          entropy_coeff: 0.009999999999999998
          kl: 0.04400948922589628
          policy_loss: -0.024076961974302927
          total_loss: 0.21328761561049356
          vf_explained_var: -1.0
          vf_loss: 0.16037901188764309
    num_agent_steps_sampled: 104000
    num_agent_steps_trained: 104000
    num_steps_sampled: 104000
    num_steps_trained: 104000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,104,2053.76,104000,-6.9434,-2.51,-10,694.34


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 105000
  custom_metrics: {}
  date: 2021-10-23_20-34-14
  done: false
  episode_len_mean: 699.89
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -6.998899999999896
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 182
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6692268788699534
          cur_lr: 5.000000000000001e-05
          entropy: 0.11620198393033611
          entropy_coeff: 0.009999999999999998
          kl: 0.016416816031819003
          policy_loss: -0.00975104421377182
          total_loss: 0.12485515847802162
          vf_explained_var: -0.9418415427207947
          vf_loss: 0.09194801077246666
    num_agent_steps_sampled: 105000
    num_agent_steps_trained: 105000
    num_steps_sampled: 105000
    num_steps_trained: 105000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,105,2065.43,105000,-6.9989,-2.51,-10,699.89


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 106000
  custom_metrics: {}
  date: 2021-10-23_20-34-26
  done: false
  episode_len_mean: 706.2
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.061999999999895
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 183
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6692268788699534
          cur_lr: 5.000000000000001e-05
          entropy: 0.11910036760899756
          entropy_coeff: 0.009999999999999998
          kl: 0.020699499464697308
          policy_loss: 0.020856221848064
          total_loss: 0.17931074963675606
          vf_explained_var: -0.7346997857093811
          vf_loss: 0.10439386951426664
    num_agent_steps_sampled: 106000
    num_agent_steps_trained: 106000
    num_steps_sampled: 106000
    num_steps_trained: 106000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,106,2076.74,106000,-7.062,-2.51,-10,706.2


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 107000
  custom_metrics: {}
  date: 2021-10-23_20-34-37
  done: false
  episode_len_mean: 712.22
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.1221999999998955
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 184
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.00384031830493
          cur_lr: 5.000000000000001e-05
          entropy: 0.1361077221731345
          entropy_coeff: 0.009999999999999998
          kl: 0.02261968922490875
          policy_loss: 0.015961933301554786
          total_loss: 0.19855359747178025
          vf_explained_var: -0.757503092288971
          vf_loss: 0.09338711156613297
    num_agent_steps_sampled: 107000
    num_agent_steps_trained: 107000
    num_steps_sampled: 107000
    num_steps_trained: 107000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,107,2087.93,107000,-7.1222,-2.51,-10,712.22


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 108000
  custom_metrics: {}
  date: 2021-10-23_20-34-48
  done: false
  episode_len_mean: 718.39
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.183899999999894
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 185
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.005760477457395
          cur_lr: 5.000000000000001e-05
          entropy: 0.15460636988282203
          entropy_coeff: 0.009999999999999998
          kl: 0.028498666764547428
          policy_loss: -0.018695585429668427
          total_loss: 0.27066308856010435
          vf_explained_var: -0.7788168787956238
          vf_loss: 0.11974856985939873
    num_agent_steps_sampled: 108000
    num_agent_steps_trained: 108000
    num_steps_sampled: 108000
    num_steps_trained: 108000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,108,2099.25,108000,-7.1839,-2.51,-10,718.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 109000
  custom_metrics: {}
  date: 2021-10-23_20-34-59
  done: false
  episode_len_mean: 724.45
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.244499999999892
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 186
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.008640716186092
          cur_lr: 5.000000000000001e-05
          entropy: 0.1579559495051702
          entropy_coeff: 0.009999999999999998
          kl: 0.01854905943489737
          policy_loss: 0.025948545750644474
          total_loss: 0.3211120479636722
          vf_explained_var: -0.5699697136878967
          vf_loss: 0.1296412597513861
    num_agent_steps_sampled: 109000
    num_agent_steps_trained: 109000
    num_steps_sampled: 109000
    num_steps_trained: 109000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,109,2110.62,109000,-7.2445,-2.51,-10,724.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 110000
  custom_metrics: {}
  date: 2021-10-23_20-35-11
  done: false
  episode_len_mean: 730.2
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.301999999999891
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 187
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.008640716186092
          cur_lr: 5.000000000000001e-05
          entropy: 0.19200508089529145
          entropy_coeff: 0.009999999999999998
          kl: 0.03581796124991443
          policy_loss: -0.01761197352574931
          total_loss: 0.48252333568202127
          vf_explained_var: -0.7140599489212036
          vf_loss: 0.17938423450622293
    num_agent_steps_sampled: 110000
    num_agent_steps_trained: 110000
    num_steps_sampled: 110000
    num_steps_trained: 110000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,110,2121.9,110000,-7.302,-2.51,-10,730.2


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 111000
  custom_metrics: {}
  date: 2021-10-23_20-35-22
  done: false
  episode_len_mean: 736.46
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.364599999999889
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 188
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 13.512961074279138
          cur_lr: 5.000000000000001e-05
          entropy: 0.14119277248779932
          entropy_coeff: 0.009999999999999998
          kl: 0.02039985577058461
          policy_loss: 0.04914800806177987
          total_loss: 0.45747374130619894
          vf_explained_var: -0.565555214881897
          vf_loss: 0.13407518946462207
    num_agent_steps_sampled: 111000
    num_agent_steps_trained: 111000
    num_steps_sampled: 111000
    num_steps_trained: 111000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,111,2133.18,111000,-7.3646,-2.51,-10,736.46


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 112000
  custom_metrics: {}
  date: 2021-10-23_20-35-34
  done: false
  episode_len_mean: 742.4
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.4239999999998885
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 189
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 20.269441611418706
          cur_lr: 5.000000000000001e-05
          entropy: 0.16132946304149098
          entropy_coeff: 0.009999999999999998
          kl: 0.03865011145050327
          policy_loss: -0.031125560485654406
          total_loss: 0.895856051478121
          vf_explained_var: -0.9305583238601685
          vf_loss: 0.14517873231735495
    num_agent_steps_sampled: 112000
    num_agent_steps_trained: 112000
    num_steps_sampled: 112000
    num_steps_trained: 112000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,112,2145.14,112000,-7.424,-2.51,-10,742.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 113000
  custom_metrics: {}
  date: 2021-10-23_20-35-46
  done: false
  episode_len_mean: 748.56
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.485599999999886
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 190
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 30.404162417128074
          cur_lr: 5.000000000000001e-05
          entropy: 0.13331345087952085
          entropy_coeff: 0.009999999999999998
          kl: 0.012593080455230343
          policy_loss: 0.05757308850685756
          total_loss: 0.5372017817364798
          vf_explained_var: -0.5183005332946777
          vf_loss: 0.09807975565393766
    num_agent_steps_sampled: 113000
    num_agent_steps_trained: 113000
    num_steps_sampled: 113000
    num_steps_trained: 113000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,113,2157.25,113000,-7.4856,-2.51,-10,748.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 114000
  custom_metrics: {}
  date: 2021-10-23_20-35-58
  done: false
  episode_len_mean: 754.74
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.547399999999885
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 191
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 30.404162417128074
          cur_lr: 5.000000000000001e-05
          entropy: 0.15557588471306694
          entropy_coeff: 0.009999999999999998
          kl: 0.02738090270302362
          policy_loss: 0.015910717927747304
          total_loss: 1.007435593340132
          vf_explained_var: -0.7720438838005066
          vf_loss: 0.16058718802200422
    num_agent_steps_sampled: 114000
    num_agent_steps_trained: 114000
    num_steps_sampled: 114000
    num_steps_trained: 114000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,114,2169.46,114000,-7.5474,-2.51,-10,754.74


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 115000
  custom_metrics: {}
  date: 2021-10-23_20-36-10
  done: false
  episode_len_mean: 760.82
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.608199999999882
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 192
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 45.60624362569211
          cur_lr: 5.000000000000001e-05
          entropy: 0.1245954643521044
          entropy_coeff: 0.009999999999999998
          kl: 0.017556499048239655
          policy_loss: 0.009419541392061446
          total_loss: 0.9152136252986061
          vf_explained_var: -0.6785057783126831
          vf_loss: 0.10635407148963875
    num_agent_steps_sampled: 115000
    num_agent_steps_trained: 115000
    num_steps_sampled: 115000
    num_steps_trained: 115000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,115,2181.06,115000,-7.6082,-2.51,-10,760.82


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 116000
  custom_metrics: {}
  date: 2021-10-23_20-36-21
  done: false
  episode_len_mean: 766.89
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.668899999999881
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 193
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 45.60624362569211
          cur_lr: 5.000000000000001e-05
          entropy: 0.14886622784866227
          entropy_coeff: 0.009999999999999998
          kl: 0.033200472738179894
          policy_loss: 0.039442096485031976
          total_loss: 1.6124423570103115
          vf_explained_var: -0.5169067978858948
          vf_loss: 0.060340084549453527
    num_agent_steps_sampled: 116000
    num_agent_steps_trained: 116000
    num_steps_sampled: 116000
    num_steps_trained: 116000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,116,2192.28,116000,-7.6689,-2.51,-10,766.89


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 117000
  custom_metrics: {}
  date: 2021-10-23_20-36-31
  done: false
  episode_len_mean: 773.05
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.730499999999879
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 194
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 68.40936543853812
          cur_lr: 5.000000000000001e-05
          entropy: 0.12054353612992498
          entropy_coeff: 0.009999999999999998
          kl: 0.025132053914583393
          policy_loss: 0.017113872203561996
          total_loss: 1.796412605047226
          vf_explained_var: -0.6498730182647705
          vf_loss: 0.06123637393530872
    num_agent_steps_sampled: 117000
    num_agent_steps_trained: 117000
    num_steps_sampled: 117000
    num_steps_trained: 117000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,117,2202.34,117000,-7.7305,-2.51,-10,773.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 118000
  custom_metrics: {}
  date: 2021-10-23_20-36-43
  done: false
  episode_len_mean: 779.21
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.79209999999988
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 195
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 102.61404815780723
          cur_lr: 5.000000000000001e-05
          entropy: 0.1282740185658137
          entropy_coeff: 0.009999999999999998
          kl: 0.034011122413600485
          policy_loss: -0.019797805779510073
          total_loss: 3.5200272056791517
          vf_explained_var: -0.5778440833091736
          vf_loss: 0.05108867244174083
    num_agent_steps_sampled: 118000
    num_agent_steps_trained: 118000
    num_steps_sampled: 118000
    num_steps_trained: 118000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,118,2213.71,118000,-7.7921,-2.51,-10,779.21


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 119000
  custom_metrics: {}
  date: 2021-10-23_20-36-54
  done: false
  episode_len_mean: 785.27
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.852699999999878
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 196
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 153.92107223671087
          cur_lr: 5.000000000000001e-05
          entropy: 0.13875559460785652
          entropy_coeff: 0.009999999999999998
          kl: 0.030177730787545442
          policy_loss: 0.012320030646191703
          total_loss: 4.706238343980577
          vf_explained_var: -0.5509690046310425
          vf_loss: 0.050317346966928904
    num_agent_steps_sampled: 119000
    num_agent_steps_trained: 119000
    num_steps_sampled: 119000
    num_steps_trained: 119000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,119,2225.39,119000,-7.8527,-2.51,-10,785.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 120000
  custom_metrics: {}
  date: 2021-10-23_20-37-06
  done: false
  episode_len_mean: 791.61
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.916099999999876
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 197
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 230.88160835506625
          cur_lr: 5.000000000000001e-05
          entropy: 0.1254870639079147
          entropy_coeff: 0.009999999999999998
          kl: 0.027416011453088786
          policy_loss: -0.0050258262289894955
          total_loss: 6.365318115552267
          vf_explained_var: -0.5409349799156189
          vf_loss: 0.04174603598399295
    num_agent_steps_sampled: 120000
    num_agent_steps_trained: 120000
    num_steps_sampled: 120000
    num_steps_trained: 120000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,120,2237.33,120000,-7.9161,-2.51,-10,791.61


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 121000
  custom_metrics: {}
  date: 2021-10-23_20-37-19
  done: false
  episode_len_mean: 797.55
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -7.975499999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 198
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 346.32241253259923
          cur_lr: 5.000000000000001e-05
          entropy: 0.12941349893808365
          entropy_coeff: 0.009999999999999998
          kl: 0.03201651006626586
          policy_loss: 0.013392839994695451
          total_loss: 11.141453410519494
          vf_explained_var: -0.49962061643600464
          vf_loss: 0.041319540256841314
    num_agent_steps_sampled: 121000
    num_agent_steps_trained: 121000
    num_steps_sampled: 121000
    num_steps_trained: 121000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,121,2249.6,121000,-7.9755,-2.51,-10,797.55


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 122000
  custom_metrics: {}
  date: 2021-10-23_20-37-31
  done: false
  episode_len_mean: 803.02
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.030199999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 199
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 519.4836187988989
          cur_lr: 5.000000000000001e-05
          entropy: 0.1424684974882338
          entropy_coeff: 0.009999999999999998
          kl: 0.017953239660710097
          policy_loss: 0.019079404407077366
          total_loss: 9.395996491114298
          vf_explained_var: -0.4716002941131592
          vf_loss: 0.0519274210350381
    num_agent_steps_sampled: 122000
    num_agent_steps_trained: 122000
    num_steps_sampled: 122000
    num_steps_trained: 122000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,122,2261.75,122000,-8.0302,-2.51,-10,803.02


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 123000
  custom_metrics: {}
  date: 2021-10-23_20-37-43
  done: false
  episode_len_mean: 808.82
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.088199999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 200
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 519.4836187988989
          cur_lr: 5.000000000000001e-05
          entropy: 0.09436790616148048
          entropy_coeff: 0.009999999999999998
          kl: 0.007959453364472008
          policy_loss: -0.03410199715031518
          total_loss: 4.138819269339243
          vf_explained_var: -0.5680614113807678
          vf_loss: 0.03905907126350535
    num_agent_steps_sampled: 123000
    num_agent_steps_trained: 123000
    num_steps_sampled: 123000
    num_steps_trained: 123000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,123,2273.46,123000,-8.0882,-2.51,-10,808.82


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 124000
  custom_metrics: {}
  date: 2021-10-23_20-37-54
  done: false
  episode_len_mean: 814.39
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.14389999999987
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 201
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 519.4836187988989
          cur_lr: 5.000000000000001e-05
          entropy: 0.11325760185718536
          entropy_coeff: 0.009999999999999998
          kl: 0.019696755508064396
          policy_loss: -0.015616286794344585
          total_loss: 10.267044268051784
          vf_explained_var: -0.4894258677959442
          vf_loss: 0.051650833980076846
    num_agent_steps_sampled: 124000
    num_agent_steps_trained: 124000
    num_steps_sampled: 124000
    num_steps_trained: 124000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,124,2285.11,124000,-8.1439,-2.51,-10,814.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 125000
  custom_metrics: {}
  date: 2021-10-23_20-38-06
  done: false
  episode_len_mean: 820.18
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.20179999999987
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 202
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 519.4836187988989
          cur_lr: 5.000000000000001e-05
          entropy: 0.12105568961964713
          entropy_coeff: 0.009999999999999998
          kl: 0.015225226721829839
          policy_loss: 0.0018374454643991258
          total_loss: 7.954795763227675
          vf_explained_var: -0.5211796164512634
          vf_loss: 0.044912699630690944
    num_agent_steps_sampled: 125000
    num_agent_steps_trained: 125000
    num_steps_sampled: 125000
    num_steps_trained: 125000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,125,2297.2,125000,-8.2018,-2.51,-10,820.18


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 126000
  custom_metrics: {}
  date: 2021-10-23_20-38-18
  done: false
  episode_len_mean: 825.83
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.25829999999987
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 203
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 519.4836187988989
          cur_lr: 5.000000000000001e-05
          entropy: 0.11431868589586681
          entropy_coeff: 0.009999999999999998
          kl: 0.02320100700534466
          policy_loss: -0.0013135340478685168
          total_loss: 12.083258051342435
          vf_explained_var: -0.47792577743530273
          vf_loss: 0.03317128283282121
    num_agent_steps_sampled: 126000
    num_agent_steps_trained: 126000
    num_steps_sampled: 126000
    num_steps_trained: 126000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,126,2308.99,126000,-8.2583,-2.51,-10,825.83


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 127000
  custom_metrics: {}
  date: 2021-10-23_20-38-30
  done: false
  episode_len_mean: 832.36
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.323599999999868
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 204
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 779.2254281983486
          cur_lr: 5.000000000000001e-05
          entropy: 0.12189994189474318
          entropy_coeff: 0.009999999999999998
          kl: 0.013617771584540606
          policy_loss: -0.05483993159400092
          total_loss: 10.585456720987956
          vf_explained_var: -0.5296938419342041
          vf_loss: 0.03020206071022484
    num_agent_steps_sampled: 127000
    num_agent_steps_trained: 127000
    num_steps_sampled: 127000
    num_steps_trained: 127000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,127,2320.75,127000,-8.3236,-2.51,-10,832.36


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 128000
  custom_metrics: {}
  date: 2021-10-23_20-38-42
  done: false
  episode_len_mean: 838.25
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.382499999999867
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 205
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 779.2254281983486
          cur_lr: 5.000000000000001e-05
          entropy: 0.09384631075792843
          entropy_coeff: 0.009999999999999998
          kl: 0.007972117226260404
          policy_loss: 0.019372827145788404
          total_loss: 6.267025559478336
          vf_explained_var: -0.5202066898345947
          vf_loss: 0.03651490323245525
    num_agent_steps_sampled: 128000
    num_agent_steps_trained: 128000
    num_steps_sampled: 128000
    num_steps_trained: 128000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,128,2333.08,128000,-8.3825,-2.51,-10,838.25


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 129000
  custom_metrics: {}
  date: 2021-10-23_20-38-55
  done: false
  episode_len_mean: 842.69
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.426899999999867
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 206
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 779.2254281983486
          cur_lr: 5.000000000000001e-05
          entropy: 0.06516613790558444
          entropy_coeff: 0.009999999999999998
          kl: 0.005201523357795344
          policy_loss: -0.0685784152812428
          total_loss: 4.013733479711744
          vf_explained_var: -0.5680638551712036
          vf_loss: 0.029804408840007252
    num_agent_steps_sampled: 129000
    num_agent_steps_trained: 129000
    num_steps_sampled: 129000
    num_steps_trained: 129000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,129,2345.15,129000,-8.4269,-2.51,-10,842.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 130000
  custom_metrics: {}
  date: 2021-10-23_20-39-06
  done: false
  episode_len_mean: 848.98
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.489799999999866
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 207
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 779.2254281983486
          cur_lr: 5.000000000000001e-05
          entropy: 0.0883300152917703
          entropy_coeff: 0.009999999999999998
          kl: 0.009017275590708272
          policy_loss: -0.026933994558122423
          total_loss: 7.029867168267568
          vf_explained_var: -0.49122482538223267
          vf_loss: 0.031194216571748257
    num_agent_steps_sampled: 130000
    num_agent_steps_trained: 130000
    num_steps_sampled: 130000
    num_steps_trained: 130000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,130,2356.97,130000,-8.4898,-2.51,-10,848.98


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 131000
  custom_metrics: {}
  date: 2021-10-23_20-39-18
  done: false
  episode_len_mean: 853.75
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.537499999999865
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 208
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 779.2254281983486
          cur_lr: 5.000000000000001e-05
          entropy: 0.0883910676671399
          entropy_coeff: 0.009999999999999998
          kl: 0.005098226697494586
          policy_loss: -0.016151915490627288
          total_loss: 3.9827341583040026
          vf_explained_var: -0.4927395284175873
          vf_loss: 0.027102233676446808
    num_agent_steps_sampled: 131000
    num_agent_steps_trained: 131000
    num_steps_sampled: 131000
    num_steps_trained: 131000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,131,2368.64,131000,-8.5375,-2.51,-10,853.75


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 132000
  custom_metrics: {}
  date: 2021-10-23_20-39-30
  done: false
  episode_len_mean: 860.34
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.603399999999862
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 209
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 779.2254281983486
          cur_lr: 5.000000000000001e-05
          entropy: 0.09022885022891892
          entropy_coeff: 0.009999999999999998
          kl: 0.006317518260200611
          policy_loss: -0.014452562895086077
          total_loss: 4.923083396752675
          vf_explained_var: -0.4518694579601288
          vf_loss: 0.015667632491224343
    num_agent_steps_sampled: 132000
    num_agent_steps_trained: 132000
    num_steps_sampled: 132000
    num_steps_trained: 132000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,132,2380.31,132000,-8.6034,-2.51,-10,860.34




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 133000
  custom_metrics: {}
  date: 2021-10-23_20-39-58
  done: false
  episode_len_mean: 865.3
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.65299999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 210
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 779.2254281983486
          cur_lr: 5.000000000000001e-05
          entropy: 0.08550900568564733
          entropy_coeff: 0.009999999999999998
          kl: 0.0026927066139049
          policy_loss: -0.06822200823161337
          total_loss: 2.0590156488948397
          vf_explained_var: -0.26923519372940063
          vf_loss: 0.029867356850041285
    num_agent_steps_sampled: 133000
    num_agent_steps_trained: 133000
    num_steps_sampled: 133000
    num_steps_trained: 133000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,133,2408.35,133000,-8.653,-2.51,-10,865.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 134000
  custom_metrics: {}
  date: 2021-10-23_20-40-11
  done: false
  episode_len_mean: 871.98
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.719799999999859
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 211
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 389.6127140991743
          cur_lr: 5.000000000000001e-05
          entropy: 0.08361860430902905
          entropy_coeff: 0.009999999999999998
          kl: 0.004637311750815974
          policy_loss: -0.022063236600822874
          total_loss: 1.8236408392588297
          vf_explained_var: -0.5644014477729797
          vf_loss: 0.039784711682134204
    num_agent_steps_sampled: 134000
    num_agent_steps_trained: 134000
    num_steps_sampled: 134000
    num_steps_trained: 134000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,134,2421.18,134000,-8.7198,-2.51,-10,871.98


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 135000
  custom_metrics: {}
  date: 2021-10-23_20-40-22
  done: false
  episode_len_mean: 877.67
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.776699999999858
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 212
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 194.80635704958715
          cur_lr: 5.000000000000001e-05
          entropy: 0.08954015117552545
          entropy_coeff: 0.009999999999999998
          kl: 0.005144493809590737
          policy_loss: -0.06750327431493336
          total_loss: 0.9555996093485091
          vf_explained_var: -0.6002189517021179
          vf_loss: 0.021818217645502753
    num_agent_steps_sampled: 135000
    num_agent_steps_trained: 135000
    num_steps_sampled: 135000
    num_steps_trained: 135000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,135,2432.65,135000,-8.7767,-2.51,-10,877.67


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 136000
  custom_metrics: {}
  date: 2021-10-23_20-40-34
  done: false
  episode_len_mean: 881.66
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.816599999999859
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 213
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 194.80635704958715
          cur_lr: 5.000000000000001e-05
          entropy: 0.09256212852067418
          entropy_coeff: 0.009999999999999998
          kl: 0.004032139200717211
          policy_loss: -0.03710687177048789
          total_loss: 0.7658514724837409
          vf_explained_var: -0.537631630897522
          vf_loss: 0.018397638734637036
    num_agent_steps_sampled: 136000
    num_agent_steps_trained: 136000
    num_steps_sampled: 136000
    num_steps_trained: 136000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,136,2444.55,136000,-8.8166,-2.51,-10,881.66


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 137000
  custom_metrics: {}
  date: 2021-10-23_20-40-46
  done: false
  episode_len_mean: 888.42
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.884199999999858
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 214
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 97.40317852479357
          cur_lr: 5.000000000000001e-05
          entropy: 0.08106917101475927
          entropy_coeff: 0.009999999999999998
          kl: 0.002023515174449939
          policy_loss: -0.042166253262095985
          total_loss: 0.1746053779290782
          vf_explained_var: -0.5683237910270691
          vf_loss: 0.0204855190590024
    num_agent_steps_sampled: 137000
    num_agent_steps_trained: 137000
    num_steps_sampled: 137000
    num_steps_trained: 137000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,137,2456.1,137000,-8.8842,-2.51,-10,888.42


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 138000
  custom_metrics: {}
  date: 2021-10-23_20-40-57
  done: false
  episode_len_mean: 895.17
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -8.951699999999853
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 215
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 48.70158926239679
          cur_lr: 5.000000000000001e-05
          entropy: 0.08112512305378913
          entropy_coeff: 0.009999999999999998
          kl: 0.0027964154788706866
          policy_loss: -0.04951946768495771
          total_loss: 0.10586632953749762
          vf_explained_var: -0.5796254277229309
          vf_loss: 0.020007174876001146
    num_agent_steps_sampled: 138000
    num_agent_steps_trained: 138000
    num_steps_sampled: 138000
    num_steps_trained: 138000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,138,2467.57,138000,-8.9517,-2.51,-10,895.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 139000
  custom_metrics: {}
  date: 2021-10-23_20-41-08
  done: false
  episode_len_mean: 900.99
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -9.009899999999853
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 216
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 24.350794631198394
          cur_lr: 5.000000000000001e-05
          entropy: 0.08683269520600637
          entropy_coeff: 0.009999999999999998
          kl: 0.003118364414614108
          policy_loss: -0.03634641758269734
          total_loss: 0.057684247526857586
          vf_explained_var: -0.5630317330360413
          vf_loss: 0.01896433863374922
    num_agent_steps_sampled: 139000
    num_agent_steps_trained: 139000
    num_steps_sampled: 139000
    num_steps_trained: 139000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,139,2478.87,139000,-9.0099,-2.51,-10,900.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 140000
  custom_metrics: {}
  date: 2021-10-23_20-41-20
  done: false
  episode_len_mean: 904.67
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -9.046699999999852
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 217
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 12.175397315599197
          cur_lr: 5.000000000000001e-05
          entropy: 0.08181147832009528
          entropy_coeff: 0.009999999999999998
          kl: 0.002437508989694632
          policy_loss: -0.047973385122087264
          total_loss: -1.1528531710306802e-06
          vf_explained_var: -0.6035236716270447
          vf_loss: 0.01911271106865671
    num_agent_steps_sampled: 140000
    num_agent_steps_trained: 140000
    num_steps_sampled: 140000
    num_steps_trained: 14000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,140,2490.26,140000,-9.0467,-2.51,-10,904.67


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 141000
  custom_metrics: {}
  date: 2021-10-23_20-41-31
  done: false
  episode_len_mean: 911.3
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -9.11299999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 218
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.087698657799598
          cur_lr: 5.000000000000001e-05
          entropy: 0.08578140454159842
          entropy_coeff: 0.009999999999999998
          kl: 0.0025710654289772114
          policy_loss: -0.049474950631459555
          total_loss: -0.0174320336845186
          vf_explained_var: -0.5961347818374634
          vf_loss: 0.017248862174650034
    num_agent_steps_sampled: 141000
    num_agent_steps_trained: 141000
    num_steps_sampled: 141000
    num_steps_trained: 141000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,141,2501.03,141000,-9.113,-2.51,-10,911.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 142000
  custom_metrics: {}
  date: 2021-10-23_20-41-42
  done: false
  episode_len_mean: 918.13
  episode_media: {}
  episode_reward_max: -2.5099999999999905
  episode_reward_mean: -9.181299999999851
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 219
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.043849328899799
          cur_lr: 5.000000000000001e-05
          entropy: 0.09142473439375559
          entropy_coeff: 0.009999999999999998
          kl: 0.0037347949834333526
          policy_loss: -0.051979705691337585
          total_loss: -0.02399014996157752
          vf_explained_var: -0.600180447101593
          vf_loss: 0.01753564198087487
    num_agent_steps_sampled: 142000
    num_agent_steps_trained: 142000
    num_steps_sampled: 142000
    num_steps_trained: 142000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,142,2512.26,142000,-9.1813,-2.51,-10,918.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 143000
  custom_metrics: {}
  date: 2021-10-23_20-41-55
  done: false
  episode_len_mean: 925.59
  episode_media: {}
  episode_reward_max: -2.8899999999999824
  episode_reward_mean: -9.255899999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 220
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5219246644498996
          cur_lr: 5.000000000000001e-05
          entropy: 0.08684898879792956
          entropy_coeff: 0.009999999999999998
          kl: 0.0028876726877772145
          policy_loss: -0.06997233861022525
          total_loss: -0.05025305069155163
          vf_explained_var: -0.6348170638084412
          vf_loss: 0.01619295508393811
    num_agent_steps_sampled: 143000
    num_agent_steps_trained: 143000
    num_steps_sampled: 143000
    num_steps_trained: 143000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,143,2524.88,143000,-9.2559,-2.89,-10,925.59


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 144000
  custom_metrics: {}
  date: 2021-10-23_20-42-06
  done: false
  episode_len_mean: 932.62
  episode_media: {}
  episode_reward_max: -2.8899999999999824
  episode_reward_mean: -9.326199999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 221
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7609623322249498
          cur_lr: 5.000000000000001e-05
          entropy: 0.07877306896779272
          entropy_coeff: 0.009999999999999998
          kl: 0.002452896223662214
          policy_loss: -0.04598011424144109
          total_loss: -0.029213212927182517
          vf_explained_var: -0.6079399585723877
          vf_loss: 0.015688068895704215
    num_agent_steps_sampled: 144000
    num_agent_steps_trained: 144000
    num_steps_sampled: 144000
    num_steps_trained: 144000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,144,2536.31,144000,-9.3262,-2.89,-10,932.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 145000
  custom_metrics: {}
  date: 2021-10-23_20-42-17
  done: false
  episode_len_mean: 936.7
  episode_media: {}
  episode_reward_max: -2.8899999999999824
  episode_reward_mean: -9.366999999999846
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 222
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.3804811661124749
          cur_lr: 5.000000000000001e-05
          entropy: 0.08403321694996622
          entropy_coeff: 0.009999999999999998
          kl: 0.003893724277925988
          policy_loss: -0.05594661546250184
          total_loss: -0.035531571921375064
          vf_explained_var: -0.6134703159332275
          vf_loss: 0.019773885669807593
    num_agent_steps_sampled: 145000
    num_agent_steps_trained: 145000
    num_steps_sampled: 145000
    num_steps_trained: 145000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,145,2547.68,145000,-9.367,-2.89,-10,936.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 146000
  custom_metrics: {}
  date: 2021-10-23_20-42-29
  done: false
  episode_len_mean: 943.81
  episode_media: {}
  episode_reward_max: -3.1699999999999764
  episode_reward_mean: -9.438099999999844
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 223
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.19024058305623745
          cur_lr: 5.000000000000001e-05
          entropy: 0.0741129058930609
          entropy_coeff: 0.009999999999999998
          kl: 0.0032120637919029428
          policy_loss: -0.06116557617982229
          total_loss: -0.04445331560240851
          vf_explained_var: -0.6549215912818909
          vf_loss: 0.016842324804100726
    num_agent_steps_sampled: 146000
    num_agent_steps_trained: 146000
    num_steps_sampled: 146000
    num_steps_trained: 146000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,146,2559.3,146000,-9.4381,-3.17,-10,943.81


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 147000
  custom_metrics: {}
  date: 2021-10-23_20-42-40
  done: false
  episode_len_mean: 950.62
  episode_media: {}
  episode_reward_max: -3.1699999999999764
  episode_reward_mean: -9.506199999999843
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 224
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09512029152811873
          cur_lr: 5.000000000000001e-05
          entropy: 0.12205652710464265
          entropy_coeff: 0.009999999999999998
          kl: 0.011446677417390876
          policy_loss: -0.028771711223655277
          total_loss: -0.013011611335807376
          vf_explained_var: -0.6201749444007874
          vf_loss: 0.01589185151581963
    num_agent_steps_sampled: 147000
    num_agent_steps_trained: 147000
    num_steps_sampled: 147000
    num_steps_trained: 147000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,147,2570.68,147000,-9.5062,-3.17,-10,950.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 148000
  custom_metrics: {}
  date: 2021-10-23_20-42-52
  done: false
  episode_len_mean: 953.99
  episode_media: {}
  episode_reward_max: -3.1699999999999764
  episode_reward_mean: -9.539899999999843
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 225
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.09512029152811873
          cur_lr: 5.000000000000001e-05
          entropy: 0.0832965095837911
          entropy_coeff: 0.009999999999999998
          kl: 0.0034710622935866316
          policy_loss: -0.037640778720378874
          total_loss: -0.02214098076025645
          vf_explained_var: -0.6420302391052246
          vf_loss: 0.016002585935509867
    num_agent_steps_sampled: 148000
    num_agent_steps_trained: 148000
    num_steps_sampled: 148000
    num_steps_trained: 148000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,148,2581.95,148000,-9.5399,-3.17,-10,953.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 149000
  custom_metrics: {}
  date: 2021-10-23_20-43-03
  done: false
  episode_len_mean: 960.82
  episode_media: {}
  episode_reward_max: -5.7199999999999225
  episode_reward_mean: -9.608199999999842
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 226
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.04756014576405936
          cur_lr: 5.000000000000001e-05
          entropy: 0.08824380627936787
          entropy_coeff: 0.009999999999999998
          kl: 0.00328363844503959
          policy_loss: -0.023705439766248067
          total_loss: -0.007672048194540871
          vf_explained_var: -0.6506512761116028
          vf_loss: 0.016759664099663497
    num_agent_steps_sampled: 149000
    num_agent_steps_trained: 149000
    num_steps_sampled: 149000
    num_steps_trained: 149000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,149,2593.36,149000,-9.6082,-5.72,-10,960.82


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 150000
  custom_metrics: {}
  date: 2021-10-23_20-43-15
  done: false
  episode_len_mean: 962.82
  episode_media: {}
  episode_reward_max: -5.7199999999999225
  episode_reward_mean: -9.628199999999842
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 227
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.02378007288202968
          cur_lr: 5.000000000000001e-05
          entropy: 0.08323749287260904
          entropy_coeff: 0.009999999999999998
          kl: 0.0025517622125335038
          policy_loss: -0.04560595452785492
          total_loss: -0.030192094379001194
          vf_explained_var: -0.6409000158309937
          vf_loss: 0.016185555539818274
    num_agent_steps_sampled: 150000
    num_agent_steps_trained: 150000
    num_steps_sampled: 150000
    num_steps_trained: 15000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,150,2604.98,150000,-9.6282,-5.72,-10,962.82


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 151000
  custom_metrics: {}
  date: 2021-10-23_20-43-27
  done: false
  episode_len_mean: 964.95
  episode_media: {}
  episode_reward_max: -5.7199999999999225
  episode_reward_mean: -9.649499999999842
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 228
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01189003644101484
          cur_lr: 5.000000000000001e-05
          entropy: 0.08359611216518614
          entropy_coeff: 0.009999999999999998
          kl: 0.0029144645862591765
          policy_loss: -0.055715963410006626
          total_loss: -0.04242155543631977
          vf_explained_var: -0.6151829361915588
          vf_loss: 0.014095719225911631
    num_agent_steps_sampled: 151000
    num_agent_steps_trained: 151000
    num_steps_sampled: 151000
    num_steps_trained: 15100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,151,2616.95,151000,-9.6495,-5.72,-10,964.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 152000
  custom_metrics: {}
  date: 2021-10-23_20-43-39
  done: false
  episode_len_mean: 967.45
  episode_media: {}
  episode_reward_max: -5.7199999999999225
  episode_reward_mean: -9.67449999999984
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 229
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00594501822050742
          cur_lr: 5.000000000000001e-05
          entropy: 0.0844819876882765
          entropy_coeff: 0.009999999999999998
          kl: 0.002255602724229296
          policy_loss: -0.035028274522887336
          total_loss: -0.02271896004676819
          vf_explained_var: -0.6229991316795349
          vf_loss: 0.013140723807737231
    num_agent_steps_sampled: 152000
    num_agent_steps_trained: 152000
    num_steps_sampled: 152000
    num_steps_trained: 152000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,152,2629.1,152000,-9.6745,-5.72,-10,967.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 153000
  custom_metrics: {}
  date: 2021-10-23_20-43-51
  done: false
  episode_len_mean: 969.06
  episode_media: {}
  episode_reward_max: -5.7199999999999225
  episode_reward_mean: -9.69059999999984
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 230
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00297250911025371
          cur_lr: 5.000000000000001e-05
          entropy: 0.08333165836003091
          entropy_coeff: 0.009999999999999998
          kl: 0.0028357297220888236
          policy_loss: -0.05211018067267206
          total_loss: -0.03929539504978392
          vf_explained_var: -0.6031792759895325
          vf_loss: 0.013639680984326535
    num_agent_steps_sampled: 153000
    num_agent_steps_trained: 153000
    num_steps_sampled: 153000
    num_steps_trained: 153000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,153,2640.88,153000,-9.6906,-5.72,-10,969.06


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 154000
  custom_metrics: {}
  date: 2021-10-23_20-44-02
  done: false
  episode_len_mean: 973.34
  episode_media: {}
  episode_reward_max: -6.499999999999906
  episode_reward_mean: -9.733399999999838
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 231
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001486254555126855
          cur_lr: 5.000000000000001e-05
          entropy: 0.08131013289093972
          entropy_coeff: 0.009999999999999998
          kl: 0.002345103978748537
          policy_loss: -0.06208603613906437
          total_loss: -0.049262847834163244
          vf_explained_var: -0.6254019737243652
          vf_loss: 0.013632799860917859
    num_agent_steps_sampled: 154000
    num_agent_steps_trained: 154000
    num_steps_sampled: 154000
    num_steps_trained: 154000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,154,2652.32,154000,-9.7334,-6.5,-10,973.34


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 155000
  custom_metrics: {}
  date: 2021-10-23_20-44-14
  done: false
  episode_len_mean: 976.11
  episode_media: {}
  episode_reward_max: -6.499999999999906
  episode_reward_mean: -9.761099999999836
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 232
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007431272775634275
          cur_lr: 5.000000000000001e-05
          entropy: 0.08533177740044064
          entropy_coeff: 0.009999999999999998
          kl: 0.0026482584323578823
          policy_loss: -0.0648414123389456
          total_loss: -0.05323451616697841
          vf_explained_var: -0.6235926747322083
          vf_loss: 0.012458246061578393
    num_agent_steps_sampled: 155000
    num_agent_steps_trained: 155000
    num_steps_sampled: 155000
    num_steps_trained: 155000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,155,2663.87,155000,-9.7611,-6.5,-10,976.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 156000
  custom_metrics: {}
  date: 2021-10-23_20-44-25
  done: false
  episode_len_mean: 978.23
  episode_media: {}
  episode_reward_max: -6.499999999999906
  episode_reward_mean: -9.782299999999838
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 233
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00037156363878171377
          cur_lr: 5.000000000000001e-05
          entropy: 0.08162012538976139
          entropy_coeff: 0.009999999999999998
          kl: 0.002625092607922852
          policy_loss: -0.05163843648301231
          total_loss: -0.040041959451304544
          vf_explained_var: -0.6172472834587097
          vf_loss: 0.01241170667215354
    num_agent_steps_sampled: 156000
    num_agent_steps_trained: 156000
    num_steps_sampled: 156000
    num_steps_trained: 15600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,156,2675.32,156000,-9.7823,-6.5,-10,978.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 157000
  custom_metrics: {}
  date: 2021-10-23_20-44-37
  done: false
  episode_len_mean: 981.7
  episode_media: {}
  episode_reward_max: -7.22999999999989
  episode_reward_mean: -9.816999999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 234
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.1398132028679053
          entropy_coeff: 0.009999999999999998
          kl: 0.016554150425104632
          policy_loss: -0.04133647812737359
          total_loss: -0.02657315234343211
          vf_explained_var: -0.7362815737724304
          vf_loss: 0.016158380700896185
    num_agent_steps_sampled: 157000
    num_agent_steps_trained: 157000
    num_steps_sampled: 157000
    num_steps_trained: 157000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,157,2686.89,157000,-9.817,-7.23,-10,981.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 158000
  custom_metrics: {}
  date: 2021-10-23_20-44-48
  done: false
  episode_len_mean: 984.47
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.844699999999836
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 235
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.08035103289617432
          entropy_coeff: 0.009999999999999998
          kl: 0.006696139801190131
          policy_loss: -0.0075964971135059995
          total_loss: 0.006818202427691883
          vf_explained_var: -0.6540789008140564
          vf_loss: 0.015216968875999251
    num_agent_steps_sampled: 158000
    num_agent_steps_trained: 158000
    num_steps_sampled: 158000
    num_steps_trained: 158

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,158,2698.46,158000,-9.8447,-8.42,-10,984.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 159000
  custom_metrics: {}
  date: 2021-10-23_20-45-00
  done: false
  episode_len_mean: 985.65
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.856499999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 236
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.10720594036910269
          entropy_coeff: 0.009999999999999998
          kl: 0.006924714587835802
          policy_loss: -0.052041229398714174
          total_loss: -0.03822079710662365
          vf_explained_var: -0.6627369523048401
          vf_loss: 0.014891212650885185
    num_agent_steps_sampled: 159000
    num_agent_steps_trained: 159000
    num_steps_sampled: 159000
    num_steps_trained: 1590

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,159,2710.04,159000,-9.8565,-8.42,-10,985.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 160000
  custom_metrics: {}
  date: 2021-10-23_20-45-12
  done: false
  episode_len_mean: 986.52
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.865199999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 237
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00018578181939085688
          cur_lr: 5.000000000000001e-05
          entropy: 0.08194567710161209
          entropy_coeff: 0.009999999999999998
          kl: 0.002881427546445694
          policy_loss: -0.051943542311588924
          total_loss: -0.03781889072722859
          vf_explained_var: -0.6790842413902283
          vf_loss: 0.014943578844476077
    num_agent_steps_sampled: 160000
    num_agent_steps_trained: 160000
    num_steps_sampled: 160000
    num_steps_trained: 1600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,160,2721.49,160000,-9.8652,-8.42,-10,986.52


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 161000
  custom_metrics: {}
  date: 2021-10-23_20-45-23
  done: false
  episode_len_mean: 987.45
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.874499999999836
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 238
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.289090969542844e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.0934183453520139
          entropy_coeff: 0.009999999999999998
          kl: 0.002559048131418725
          policy_loss: -0.05008808424075444
          total_loss: -0.03815965569681591
          vf_explained_var: -0.6512003540992737
          vf_loss: 0.012862375777007804
    num_agent_steps_sampled: 161000
    num_agent_steps_trained: 161000
    num_steps_sampled: 161000
    num_steps_trained: 161000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,161,2733.03,161000,-9.8745,-8.42,-10,987.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 162000
  custom_metrics: {}
  date: 2021-10-23_20-45-35
  done: false
  episode_len_mean: 988.77
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.887699999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 239
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.644545484771422e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.09524472852547963
          entropy_coeff: 0.009999999999999998
          kl: 0.002296457634980066
          policy_loss: -0.029775946173402998
          total_loss: -0.01861203842692905
          vf_explained_var: -0.6457254886627197
          vf_loss: 0.012116251265009243
    num_agent_steps_sampled: 162000
    num_agent_steps_trained: 162000
    num_steps_sampled: 162000
    num_steps_trained: 16200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,162,2744.45,162000,-9.8877,-8.42,-10,988.77




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 163000
  custom_metrics: {}
  date: 2021-10-23_20-46-03
  done: false
  episode_len_mean: 988.62
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.886199999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 240
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.322272742385711e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.0889272718793816
          entropy_coeff: 0.009999999999999998
          kl: 0.005543759961922964
          policy_loss: -0.057913559095727075
          total_loss: -0.045985685951179925
          vf_explained_var: -0.6559314131736755
          vf_loss: 0.012817022106092837
    num_agent_steps_sampled: 163000
    num_agent_steps_trained: 163000
    num_steps_sampled: 163000
    num_steps_trained: 16300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,163,2772.67,163000,-9.8862,-8.42,-10,988.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 164000
  custom_metrics: {}
  date: 2021-10-23_20-46-15
  done: false
  episode_len_mean: 988.77
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.887699999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 241
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.322272742385711e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.08495621515644922
          entropy_coeff: 0.009999999999999998
          kl: 0.0029166550543676647
          policy_loss: -0.05957036655810144
          total_loss: -0.04853550973865721
          vf_explained_var: -0.649834156036377
          vf_loss: 0.011884363409545686
    num_agent_steps_sampled: 164000
    num_agent_steps_trained: 164000
    num_steps_sampled: 164000
    num_steps_trained: 164000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,164,2784.48,164000,-9.8877,-8.42,-10,988.77


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 165000
  custom_metrics: {}
  date: 2021-10-23_20-46-26
  done: false
  episode_len_mean: 989.09
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.890899999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 242
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1611363711928555e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.08692894072996246
          entropy_coeff: 0.009999999999999998
          kl: 0.004903948665984596
          policy_loss: -0.05967538207769394
          total_loss: -0.048869455854098
          vf_explained_var: -0.656048059463501
          vf_loss: 0.011675165632429223
    num_agent_steps_sampled: 165000
    num_agent_steps_trained: 165000
    num_steps_sampled: 165000
    num_steps_trained: 165000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,165,2796.05,165000,-9.8909,-8.42,-10,989.09


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 166000
  custom_metrics: {}
  date: 2021-10-23_20-46-37
  done: false
  episode_len_mean: 989.49
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.894899999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 243
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.805681855964278e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.09225805608762636
          entropy_coeff: 0.009999999999999998
          kl: 0.00506055785517674
          policy_loss: -0.06270230387647947
          total_loss: -0.05234794881608751
          vf_explained_var: -0.6499637365341187
          vf_loss: 0.011276911667341159
    num_agent_steps_sampled: 166000
    num_agent_steps_trained: 166000
    num_steps_sampled: 166000
    num_steps_trained: 166000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,166,2806.86,166000,-9.8949,-8.42,-10,989.49


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 167000
  custom_metrics: {}
  date: 2021-10-23_20-46-48
  done: false
  episode_len_mean: 989.89
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.898899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 244
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.805681855964278e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.08684565482868088
          entropy_coeff: 0.009999999999999998
          kl: 0.004999974406220847
          policy_loss: -0.04868189576599333
          total_loss: -0.037925521532694496
          vf_explained_var: -0.649591326713562
          vf_loss: 0.011624810961075127
    num_agent_steps_sampled: 167000
    num_agent_steps_trained: 167000
    num_steps_sampled: 167000
    num_steps_trained: 167000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,167,2818,167000,-9.8989,-8.42,-10,989.89


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 168000
  custom_metrics: {}
  date: 2021-10-23_20-47-00
  done: false
  episode_len_mean: 990.17
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.901699999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 245
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.902840927982139e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.08744769684142537
          entropy_coeff: 0.009999999999999998
          kl: 0.0031209362586701495
          policy_loss: -0.05198750330342187
          total_loss: -0.04184948661261135
          vf_explained_var: -0.6475628614425659
          vf_loss: 0.011012482938046258
    num_agent_steps_sampled: 168000
    num_agent_steps_trained: 168000
    num_steps_sampled: 168000
    num_steps_trained: 16800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,168,2829.29,168000,-9.9017,-8.42,-10,990.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 169000
  custom_metrics: {}
  date: 2021-10-23_20-47-11
  done: false
  episode_len_mean: 990.54
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.905399999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 246
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4514204639910694e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.08212863620784548
          entropy_coeff: 0.009999999999999998
          kl: 0.00220879924017936
          policy_loss: -0.04769863486289978
          total_loss: -0.03777583332525359
          vf_explained_var: -0.6499277949333191
          vf_loss: 0.010744087025523185
    num_agent_steps_sampled: 169000
    num_agent_steps_trained: 169000
    num_steps_sampled: 169000
    num_steps_trained: 169000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,169,2840.96,169000,-9.9054,-8.42,-10,990.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 170000
  custom_metrics: {}
  date: 2021-10-23_20-47-23
  done: false
  episode_len_mean: 990.5
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.904999999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 247
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.257102319955347e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.09183569103479386
          entropy_coeff: 0.009999999999999998
          kl: 0.007200811393300278
          policy_loss: -0.04968810155987739
          total_loss: -0.03987614069547918
          vf_explained_var: -0.6408587098121643
          vf_loss: 0.010730306097927193
    num_agent_steps_sampled: 170000
    num_agent_steps_trained: 170000
    num_steps_sampled: 170000
    num_steps_trained: 170000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,170,2852.21,170000,-9.905,-8.42,-10,990.5


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 171000
  custom_metrics: {}
  date: 2021-10-23_20-47-34
  done: false
  episode_len_mean: 990.46
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.904599999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 248
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.257102319955347e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.08982098011506928
          entropy_coeff: 0.009999999999999998
          kl: 0.010490630194544793
          policy_loss: -0.0433661214593384
          total_loss: -0.03263941618303458
          vf_explained_var: -0.6414042711257935
          vf_loss: 0.011624905586035714
    num_agent_steps_sampled: 171000
    num_agent_steps_trained: 171000
    num_steps_sampled: 171000
    num_steps_trained: 171000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,171,2863.43,171000,-9.9046,-8.42,-10,990.46


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 172000
  custom_metrics: {}
  date: 2021-10-23_20-47-45
  done: false
  episode_len_mean: 990.54
  episode_media: {}
  episode_reward_max: -8.419999999999865
  episode_reward_mean: -9.905399999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 249
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.257102319955347e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.0772451440907187
          entropy_coeff: 0.009999999999999998
          kl: 0.004200217017852184
          policy_loss: -0.05723725093735589
          total_loss: -0.04761817728479703
          vf_explained_var: -0.6107264757156372
          vf_loss: 0.010391519213509228
    num_agent_steps_sampled: 172000
    num_agent_steps_trained: 172000
    num_steps_sampled: 172000
    num_steps_trained: 172000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,172,2874.78,172000,-9.9054,-8.42,-10,990.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 173000
  custom_metrics: {}
  date: 2021-10-23_20-47-57
  done: false
  episode_len_mean: 992.12
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.921199999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 250
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.08730738452739185
          entropy_coeff: 0.009999999999999998
          kl: 0.005171750360427218
          policy_loss: -0.05421907243629297
          total_loss: -0.04388556029233667
          vf_explained_var: -0.6404196619987488
          vf_loss: 0.011206594425150089
    num_agent_steps_sampled: 173000
    num_agent_steps_trained: 173000
    num_steps_sampled: 173000
    num_steps_trained: 17300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,173,2886.24,173000,-9.9212,-8.58,-10,992.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 174000
  custom_metrics: {}
  date: 2021-10-23_20-48-08
  done: false
  episode_len_mean: 992.72
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.927199999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 251
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.1090716010166539
          entropy_coeff: 0.009999999999999998
          kl: 0.014193651263809039
          policy_loss: -0.039033909348977935
          total_loss: -0.029063924650351205
          vf_explained_var: -0.6435876488685608
          vf_loss: 0.011060698990089197
    num_agent_steps_sampled: 174000
    num_agent_steps_trained: 174000
    num_steps_sampled: 174000
    num_steps_trained: 1740

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,174,2897.51,174000,-9.9272,-8.58,-10,992.72


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 175000
  custom_metrics: {}
  date: 2021-10-23_20-48-20
  done: false
  episode_len_mean: 993.5
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.934999999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 252
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.1041573531097836
          entropy_coeff: 0.009999999999999998
          kl: 0.007649108506221738
          policy_loss: -0.05206903566916784
          total_loss: -0.04199811253282759
          vf_explained_var: -0.6513581275939941
          vf_loss: 0.011112494714972046
    num_agent_steps_sampled: 175000
    num_agent_steps_trained: 175000
    num_steps_sampled: 175000
    num_steps_trained: 175000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,175,2909.14,175000,-9.935,-8.58,-10,993.5


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 176000
  custom_metrics: {}
  date: 2021-10-23_20-48-31
  done: false
  episode_len_mean: 993.78
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.937799999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 253
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.09107942332824072
          entropy_coeff: 0.009999999999999998
          kl: 0.008308013593260612
          policy_loss: -0.06345900131596459
          total_loss: -0.05426154765817854
          vf_explained_var: -0.4491777718067169
          vf_loss: 0.01010824431432411
    num_agent_steps_sampled: 176000
    num_agent_steps_trained: 176000
    num_steps_sampled: 176000
    num_steps_trained: 176000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,176,2920.43,176000,-9.9378,-8.58,-10,993.78


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 177000
  custom_metrics: {}
  date: 2021-10-23_20-48-42
  done: false
  episode_len_mean: 994.14
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.941399999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 254
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.1014188688662317
          entropy_coeff: 0.009999999999999998
          kl: 0.008059198880154226
          policy_loss: -0.04084666404459211
          total_loss: -0.03051394298672676
          vf_explained_var: -0.4757714569568634
          vf_loss: 0.011346913403314021
    num_agent_steps_sampled: 177000
    num_agent_steps_trained: 177000
    num_steps_sampled: 177000
    num_steps_trained: 177000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,177,2931.78,177000,-9.9414,-8.58,-10,994.14


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 178000
  custom_metrics: {}
  date: 2021-10-23_20-48-54
  done: false
  episode_len_mean: 994.35
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943499999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 255
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.11204130260480774
          entropy_coeff: 0.009999999999999998
          kl: 0.010741152068496579
          policy_loss: -0.05481258730093638
          total_loss: -0.04408461186620924
          vf_explained_var: -0.6082149744033813
          vf_loss: 0.011848381853714172
    num_agent_steps_sampled: 178000
    num_agent_steps_trained: 178000
    num_steps_sampled: 178000
    num_steps_trained: 17800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,178,2943.19,178000,-9.9435,-8.58,-10,994.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 179000
  custom_metrics: {}
  date: 2021-10-23_20-49-06
  done: false
  episode_len_mean: 994.31
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943099999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 256
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.09863243831528558
          entropy_coeff: 0.009999999999999998
          kl: 0.014951546567802627
          policy_loss: -0.024798652074403234
          total_loss: -0.013754501524898741
          vf_explained_var: -0.4260792136192322
          vf_loss: 0.012030478678126302
    num_agent_steps_sampled: 179000
    num_agent_steps_trained: 179000
    num_steps_sampled: 179000
    num_steps_trained: 179

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,179,2955.01,179000,-9.9431,-8.58,-10,994.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 180000
  custom_metrics: {}
  date: 2021-10-23_20-49-17
  done: false
  episode_len_mean: 994.27
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.942699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 257
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.0877121345864402
          entropy_coeff: 0.009999999999999998
          kl: 0.007736854669120576
          policy_loss: -0.05117226102285915
          total_loss: -0.04114518832001421
          vf_explained_var: -0.5589351058006287
          vf_loss: 0.010904195947417369
    num_agent_steps_sampled: 180000
    num_agent_steps_trained: 180000
    num_steps_sampled: 180000
    num_steps_trained: 180000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,180,2966.93,180000,-9.9427,-8.58,-10,994.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 181000
  custom_metrics: {}
  date: 2021-10-23_20-49-29
  done: false
  episode_len_mean: 994.23
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.942299999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 258
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.08796774306231075
          entropy_coeff: 0.009999999999999998
          kl: 0.006977073682032319
          policy_loss: -0.07595323911971516
          total_loss: -0.06646260904769102
          vf_explained_var: -0.6472337245941162
          vf_loss: 0.010370296468803037
    num_agent_steps_sampled: 181000
    num_agent_steps_trained: 181000
    num_steps_sampled: 181000
    num_steps_trained: 18100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,181,2978.53,181000,-9.9423,-8.58,-10,994.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 182000
  custom_metrics: {}
  date: 2021-10-23_20-49-41
  done: false
  episode_len_mean: 994.23
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.94229999999983
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 259
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.086647570050425
          entropy_coeff: 0.009999999999999998
          kl: 0.006117838115379628
          policy_loss: -0.02671563650170962
          total_loss: -0.01668871185845799
          vf_explained_var: -0.6919413805007935
          vf_loss: 0.010893393549809439
    num_agent_steps_sampled: 182000
    num_agent_steps_trained: 182000
    num_steps_sampled: 182000
    num_steps_trained: 182000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,182,2990.05,182000,-9.9423,-8.58,-10,994.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 183000
  custom_metrics: {}
  date: 2021-10-23_20-49-53
  done: false
  episode_len_mean: 994.23
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.942299999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 260
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6285511599776735e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.0831356762184037
          entropy_coeff: 0.009999999999999998
          kl: 0.0038356481766742135
          policy_loss: -0.024080893562899695
          total_loss: -0.014857080909940932
          vf_explained_var: -0.6611854434013367
          vf_loss: 0.010055165824532095
    num_agent_steps_sampled: 183000
    num_agent_steps_trained: 183000
    num_steps_sampled: 183000
    num_steps_trained: 183

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,183,3002.22,183000,-9.9423,-8.58,-10,994.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 184000
  custom_metrics: {}
  date: 2021-10-23_20-50-05
  done: false
  episode_len_mean: 994.31
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 261
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8142755799888368e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.08763300370838907
          entropy_coeff: 0.009999999999999998
          kl: 0.006012666001010479
          policy_loss: -0.032046513507763545
          total_loss: -0.0227759401831362
          vf_explained_var: -0.6689106225967407
          vf_loss: 0.010146908253793501
    num_agent_steps_sampled: 184000
    num_agent_steps_trained: 184000
    num_steps_sampled: 184000
    num_steps_trained: 18400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,184,3013.9,184000,-9.9431,-8.58,-10,994.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 185000
  custom_metrics: {}
  date: 2021-10-23_20-50-16
  done: false
  episode_len_mean: 994.31
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 262
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8142755799888368e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.08882191603382429
          entropy_coeff: 0.009999999999999998
          kl: 0.007187112076078645
          policy_loss: -0.03047402219639884
          total_loss: -0.021055528024832407
          vf_explained_var: -0.6624611020088196
          vf_loss: 0.010306719534047363
    num_agent_steps_sampled: 185000
    num_agent_steps_trained: 185000
    num_steps_sampled: 185000
    num_steps_trained: 1850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,185,3025.55,185000,-9.9431,-8.58,-10,994.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 186000
  custom_metrics: {}
  date: 2021-10-23_20-50-28
  done: false
  episode_len_mean: 994.35
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943499999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 263
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8142755799888368e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.08614945941501194
          entropy_coeff: 0.009999999999999998
          kl: 0.005890792515128851
          policy_loss: -0.0562579654985004
          total_loss: -0.046636484066645306
          vf_explained_var: -0.6789448857307434
          vf_loss: 0.010482976079866703
    num_agent_steps_sampled: 186000
    num_agent_steps_trained: 186000
    num_steps_sampled: 186000
    num_steps_trained: 18600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,186,3037.04,186000,-9.9435,-8.58,-10,994.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 187000
  custom_metrics: {}
  date: 2021-10-23_20-50-39
  done: false
  episode_len_mean: 994.39
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943899999999836
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 264
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8142755799888368e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.08178787223166889
          entropy_coeff: 0.009999999999999998
          kl: 0.004754746270676454
          policy_loss: -0.029799918995963202
          total_loss: -0.019935123291280533
          vf_explained_var: -0.6745011806488037
          vf_loss: 0.010682673077099025
    num_agent_steps_sampled: 187000
    num_agent_steps_trained: 187000
    num_steps_sampled: 187000
    num_steps_trained: 187

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,187,3048.7,187000,-9.9439,-8.58,-10,994.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 188000
  custom_metrics: {}
  date: 2021-10-23_20-50-51
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.944299999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 265
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.071377899944184e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.09205145397120051
          entropy_coeff: 0.009999999999999998
          kl: 0.013430522423651484
          policy_loss: -0.04852933420075311
          total_loss: -0.038594295498397616
          vf_explained_var: -0.6818767786026001
          vf_loss: 0.010855557360789842
    num_agent_steps_sampled: 188000
    num_agent_steps_trained: 188000
    num_steps_sampled: 188000
    num_steps_trained: 18800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,188,3060.12,188000,-9.9443,-8.58,-10,994.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 189000
  custom_metrics: {}
  date: 2021-10-23_20-51-02
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.944299999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 266
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.071377899944184e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.08476792855395211
          entropy_coeff: 0.009999999999999998
          kl: 0.004475979398315151
          policy_loss: -0.07103784630695979
          total_loss: -0.06137679119904836
          vf_explained_var: -0.6795949339866638
          vf_loss: 0.010508736533423265
    num_agent_steps_sampled: 189000
    num_agent_steps_trained: 189000
    num_steps_sampled: 189000
    num_steps_trained: 189000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,189,3071.63,189000,-9.9443,-8.58,-10,994.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 190000
  custom_metrics: {}
  date: 2021-10-23_20-51-14
  done: false
  episode_len_mean: 994.39
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 267
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.535688949972092e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.07152647107011742
          entropy_coeff: 0.009999999999999998
          kl: 0.002917911659460515
          policy_loss: -0.043268027239375645
          total_loss: -0.032780925681193666
          vf_explained_var: -0.6625582575798035
          vf_loss: 0.011202368144101154
    num_agent_steps_sampled: 190000
    num_agent_steps_trained: 190000
    num_steps_sampled: 190000
    num_steps_trained: 1900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,190,3083.24,190000,-9.9439,-8.58,-10,994.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 191000
  custom_metrics: {}
  date: 2021-10-23_20-51-26
  done: false
  episode_len_mean: 994.39
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 268
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.267844474986046e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.08970285869306988
          entropy_coeff: 0.009999999999999998
          kl: 0.005971954892285996
          policy_loss: -0.029086750580204858
          total_loss: -0.01958162122302585
          vf_explained_var: -0.6663177013397217
          vf_loss: 0.010402160932102965
    num_agent_steps_sampled: 191000
    num_agent_steps_trained: 191000
    num_steps_sampled: 191000
    num_steps_trained: 19100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,191,3094.82,191000,-9.9439,-8.58,-10,994.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 192000
  custom_metrics: {}
  date: 2021-10-23_20-51-37
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.944299999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 269
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.267844474986046e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.08867024133602779
          entropy_coeff: 0.009999999999999998
          kl: 0.005471881250074754
          policy_loss: -0.026465532680352528
          total_loss: -0.01723662813504537
          vf_explained_var: -0.6511636972427368
          vf_loss: 0.010115611994276859
    num_agent_steps_sampled: 192000
    num_agent_steps_trained: 192000
    num_steps_sampled: 192000
    num_steps_trained: 19200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,192,3106.43,192000,-9.9443,-8.58,-10,994.43




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 193000
  custom_metrics: {}
  date: 2021-10-23_20-52-07
  done: false
  episode_len_mean: 992.94
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929399999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 270
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.267844474986046e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.10055597225824991
          entropy_coeff: 0.009999999999999998
          kl: 0.010657722783637129
          policy_loss: -0.02579617417520947
          total_loss: -0.017059090236822765
          vf_explained_var: -0.6304014325141907
          vf_loss: 0.009742649961521643
    num_agent_steps_sampled: 193000
    num_agent_steps_trained: 193000
    num_steps_sampled: 193000
    num_steps_trained: 19300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,193,3136.23,193000,-9.9294,-8.51,-10,992.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 194000
  custom_metrics: {}
  date: 2021-10-23_20-52-19
  done: false
  episode_len_mean: 992.94
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929399999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 271
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.267844474986046e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.08607157948944304
          entropy_coeff: 0.009999999999999998
          kl: 0.002745303999270416
          policy_loss: -0.04643081095483568
          total_loss: -0.03737394048107995
          vf_explained_var: -0.62497878074646
          vf_loss: 0.009917586017400027
    num_agent_steps_sampled: 194000
    num_agent_steps_trained: 194000
    num_steps_sampled: 194000
    num_steps_trained: 194000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,194,3148.08,194000,-9.9294,-8.51,-10,992.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 195000
  custom_metrics: {}
  date: 2021-10-23_20-52-31
  done: false
  episode_len_mean: 992.94
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929399999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 272
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.133922237493023e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.08832004268964132
          entropy_coeff: 0.009999999999999998
          kl: 0.0037661093481195468
          policy_loss: -0.05189051181077957
          total_loss: -0.04345064875152376
          vf_explained_var: -0.6359334588050842
          vf_loss: 0.009323065035924729
    num_agent_steps_sampled: 195000
    num_agent_steps_trained: 195000
    num_steps_sampled: 195000
    num_steps_trained: 19500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,195,3159.92,195000,-9.9294,-8.51,-10,992.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 196000
  custom_metrics: {}
  date: 2021-10-23_20-52-42
  done: false
  episode_len_mean: 992.94
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929399999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 273
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.669611187465115e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.09008698686957359
          entropy_coeff: 0.009999999999999998
          kl: 0.006967998400796205
          policy_loss: -0.04393249750137329
          total_loss: -0.03566991024547153
          vf_explained_var: -0.6299407482147217
          vf_loss: 0.00916346350633022
    num_agent_steps_sampled: 196000
    num_agent_steps_trained: 196000
    num_steps_sampled: 196000
    num_steps_trained: 196000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,196,3171.38,196000,-9.9294,-8.51,-10,992.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 197000
  custom_metrics: {}
  date: 2021-10-23_20-52-54
  done: false
  episode_len_mean: 992.98
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929799999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 274
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.669611187465115e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.13362250046597587
          entropy_coeff: 0.009999999999999998
          kl: 0.018040065012044377
          policy_loss: -0.01599472529358334
          total_loss: -0.005763988031281365
          vf_explained_var: -0.6250104308128357
          vf_loss: 0.011566962987288005
    num_agent_steps_sampled: 197000
    num_agent_steps_trained: 197000
    num_steps_sampled: 197000
    num_steps_trained: 19700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,197,3182.98,197000,-9.9298,-8.51,-10,992.98


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 198000
  custom_metrics: {}
  date: 2021-10-23_20-53-06
  done: false
  episode_len_mean: 992.99
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929899999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 275
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.669611187465115e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.11072282617290814
          entropy_coeff: 0.009999999999999998
          kl: 0.009967137360945344
          policy_loss: -0.027888652351167467
          total_loss: -0.018756913642088573
          vf_explained_var: -0.6099923253059387
          vf_loss: 0.010238968693496038
    num_agent_steps_sampled: 198000
    num_agent_steps_trained: 198000
    num_steps_sampled: 198000
    num_steps_trained: 1980

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,198,3194.63,198000,-9.9299,-8.51,-10,992.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 199000
  custom_metrics: {}
  date: 2021-10-23_20-53-17
  done: false
  episode_len_mean: 992.91
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929099999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 276
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.669611187465115e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.09944639172818925
          entropy_coeff: 0.009999999999999998
          kl: 0.005447967730772992
          policy_loss: -0.039976982110076484
          total_loss: -0.03159511701928245
          vf_explained_var: -0.6094845533370972
          vf_loss: 0.00937632845921649
    num_agent_steps_sampled: 199000
    num_agent_steps_trained: 199000
    num_steps_sampled: 199000
    num_steps_trained: 199000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,199,3206.3,199000,-9.9291,-8.51,-10,992.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 200000
  custom_metrics: {}
  date: 2021-10-23_20-53-29
  done: false
  episode_len_mean: 992.91
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929099999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 277
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.669611187465115e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.1032461060418023
          entropy_coeff: 0.009999999999999998
          kl: 0.011513785774716073
          policy_loss: -0.05626225090689129
          total_loss: -0.04743962983290354
          vf_explained_var: -0.6175187826156616
          vf_loss: 0.00985508314980608
    num_agent_steps_sampled: 200000
    num_agent_steps_trained: 200000
    num_steps_sampled: 200000
    num_steps_trained: 200000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,200,3217.78,200000,-9.9291,-8.51,-10,992.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 201000
  custom_metrics: {}
  date: 2021-10-23_20-53-40
  done: false
  episode_len_mean: 992.95
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929499999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 278
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.669611187465115e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.08814899267421829
          entropy_coeff: 0.009999999999999998
          kl: 0.005073354719206691
          policy_loss: -0.07942362162801954
          total_loss: -0.0710291369093789
          vf_explained_var: -0.6149818301200867
          vf_loss: 0.009275976767660015
    num_agent_steps_sampled: 201000
    num_agent_steps_trained: 201000
    num_steps_sampled: 201000
    num_steps_trained: 201000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,201,3229.28,201000,-9.9295,-8.51,-10,992.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 202000
  custom_metrics: {}
  date: 2021-10-23_20-53-52
  done: false
  episode_len_mean: 992.87
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.928699999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 279
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.669611187465115e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.10064183043109046
          entropy_coeff: 0.009999999999999998
          kl: 0.012380871889440136
          policy_loss: -0.0616703203982777
          total_loss: -0.05461834387646781
          vf_explained_var: -0.5639652013778687
          vf_loss: 0.008058394231678296
    num_agent_steps_sampled: 202000
    num_agent_steps_trained: 202000
    num_steps_sampled: 202000
    num_steps_trained: 202000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,202,3241.29,202000,-9.9287,-8.51,-10,992.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 203000
  custom_metrics: {}
  date: 2021-10-23_20-54-04
  done: false
  episode_len_mean: 994.29
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.942899999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 280
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.669611187465115e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.0859119572573238
          entropy_coeff: 0.009999999999999998
          kl: 0.004176186027729677
          policy_loss: -0.07194086644384597
          total_loss: -0.06387441390090519
          vf_explained_var: -0.6172655820846558
          vf_loss: 0.00892557974697815
    num_agent_steps_sampled: 203000
    num_agent_steps_trained: 203000
    num_steps_sampled: 203000
    num_steps_trained: 203000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,203,3252.7,203000,-9.9429,-8.51,-10,994.29


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 204000
  custom_metrics: {}
  date: 2021-10-23_20-54-15
  done: false
  episode_len_mean: 994.29
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.942899999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 281
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8348055937325574e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.08967976421117782
          entropy_coeff: 0.009999999999999998
          kl: 0.005815979337138641
          policy_loss: -0.06541906843582788
          total_loss: -0.05788554383648766
          vf_explained_var: -0.6012147068977356
          vf_loss: 0.008430330126753285
    num_agent_steps_sampled: 204000
    num_agent_steps_trained: 204000
    num_steps_sampled: 204000
    num_steps_trained: 20400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,204,3264.13,204000,-9.9429,-8.51,-10,994.29


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 205000
  custom_metrics: {}
  date: 2021-10-23_20-54-27
  done: false
  episode_len_mean: 994.26
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.942599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 282
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8348055937325574e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.08975108580456839
          entropy_coeff: 0.009999999999999998
          kl: 0.006731364006797472
          policy_loss: -0.0468341824081209
          total_loss: -0.03956813712914785
          vf_explained_var: -0.5839703679084778
          vf_loss: 0.008163555620962546
    num_agent_steps_sampled: 205000
    num_agent_steps_trained: 205000
    num_steps_sampled: 205000
    num_steps_trained: 205000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,205,3275.54,205000,-9.9426,-8.51,-10,994.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 206000
  custom_metrics: {}
  date: 2021-10-23_20-54-38
  done: false
  episode_len_mean: 994.18
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941799999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 283
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8348055937325574e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.09356237310502265
          entropy_coeff: 0.009999999999999998
          kl: 0.005271560950980832
          policy_loss: -0.07486477800541454
          total_loss: -0.06842466501726044
          vf_explained_var: -0.5739423036575317
          vf_loss: 0.007375736931701087
    num_agent_steps_sampled: 206000
    num_agent_steps_trained: 206000
    num_steps_sampled: 206000
    num_steps_trained: 20600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,206,3286.89,206000,-9.9418,-8.51,-10,994.18


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 207000
  custom_metrics: {}
  date: 2021-10-23_20-54-49
  done: false
  episode_len_mean: 994.14
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941399999999831
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 284
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8348055937325574e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.10183829888701439
          entropy_coeff: 0.009999999999999998
          kl: 0.010156489295574526
          policy_loss: -0.058466691275437674
          total_loss: -0.051359934194220434
          vf_explained_var: -0.5689526796340942
          vf_loss: 0.008125146889748672
    num_agent_steps_sampled: 207000
    num_agent_steps_trained: 207000
    num_steps_sampled: 207000
    num_steps_trained: 207

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,207,3298.2,207000,-9.9414,-8.51,-10,994.14


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 208000
  custom_metrics: {}
  date: 2021-10-23_20-55-01
  done: false
  episode_len_mean: 994.1
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940999999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 285
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8348055937325574e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.09429547232058313
          entropy_coeff: 0.009999999999999998
          kl: 0.004469352095232655
          policy_loss: -0.06851904491583506
          total_loss: -0.06068610433075163
          vf_explained_var: -0.6557067632675171
          vf_loss: 0.008775894167936511
    num_agent_steps_sampled: 208000
    num_agent_steps_trained: 208000
    num_steps_sampled: 208000
    num_steps_trained: 208000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,208,3309.57,208000,-9.941,-8.51,-10,994.1


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 209000
  custom_metrics: {}
  date: 2021-10-23_20-55-13
  done: false
  episode_len_mean: 994.02
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940199999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 286
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4174027968662787e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.10220407115088569
          entropy_coeff: 0.009999999999999998
          kl: 0.01377461020472563
          policy_loss: -0.0515348623196284
          total_loss: -0.044597347577412925
          vf_explained_var: -0.5891354084014893
          vf_loss: 0.007959557715285984
    num_agent_steps_sampled: 209000
    num_agent_steps_trained: 209000
    num_steps_sampled: 209000
    num_steps_trained: 209000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,209,3322,209000,-9.9402,-8.51,-10,994.02


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 210000
  custom_metrics: {}
  date: 2021-10-23_20-55-25
  done: false
  episode_len_mean: 994.02
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940199999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 287
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4174027968662787e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.09155034580164485
          entropy_coeff: 0.009999999999999998
          kl: 0.006622921975536479
          policy_loss: -0.04366960078477859
          total_loss: -0.03544338958130942
          vf_explained_var: -0.6366048455238342
          vf_loss: 0.009141714798493518
    num_agent_steps_sampled: 210000
    num_agent_steps_trained: 210000
    num_steps_sampled: 210000
    num_steps_trained: 21000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,210,3333.31,210000,-9.9402,-8.51,-10,994.02


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 211000
  custom_metrics: {}
  date: 2021-10-23_20-55-36
  done: false
  episode_len_mean: 994.02
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940199999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 288
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4174027968662787e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.10495546691947513
          entropy_coeff: 0.009999999999999998
          kl: 0.011475045947978895
          policy_loss: -0.05710694028271569
          total_loss: -0.049950961437490254
          vf_explained_var: -0.6204105019569397
          vf_loss: 0.008205532574275922
    num_agent_steps_sampled: 211000
    num_agent_steps_trained: 211000
    num_steps_sampled: 211000
    num_steps_trained: 2110

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,211,3344.64,211000,-9.9402,-8.51,-10,994.02


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 212000
  custom_metrics: {}
  date: 2021-10-23_20-55-47
  done: false
  episode_len_mean: 993.94
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939399999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 289
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4174027968662787e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.08820668632785479
          entropy_coeff: 0.009999999999999998
          kl: 0.0050988801431635184
          policy_loss: -0.0428888072570165
          total_loss: -0.03571336236264971
          vf_explained_var: -0.6221462488174438
          vf_loss: 0.00805751350061554
    num_agent_steps_sampled: 212000
    num_agent_steps_trained: 212000
    num_steps_sampled: 212000
    num_steps_trained: 212000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,212,3356.2,212000,-9.9394,-8.51,-10,993.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 213000
  custom_metrics: {}
  date: 2021-10-23_20-55-59
  done: false
  episode_len_mean: 993.94
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939399999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 290
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4174027968662787e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.08258071823252572
          entropy_coeff: 0.009999999999999998
          kl: 0.0028976909059565516
          policy_loss: -0.032839290135436586
          total_loss: -0.02558927403555976
          vf_explained_var: -0.6301074624061584
          vf_loss: 0.00807582523363332
    num_agent_steps_sampled: 213000
    num_agent_steps_trained: 213000
    num_steps_sampled: 213000
    num_steps_trained: 2130

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,213,3367.5,213000,-9.9394,-8.51,-10,993.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 214000
  custom_metrics: {}
  date: 2021-10-23_20-56-10
  done: false
  episode_len_mean: 993.97
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 291
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.087013984331394e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.09177234263883696
          entropy_coeff: 0.009999999999999998
          kl: 0.0040097811854340965
          policy_loss: -0.03522063477171792
          total_loss: -0.02767683979537752
          vf_explained_var: -0.6370370388031006
          vf_loss: 0.00846152023360547
    num_agent_steps_sampled: 214000
    num_agent_steps_trained: 214000
    num_steps_sampled: 214000
    num_steps_trained: 214000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,214,3379.06,214000,-9.9397,-8.51,-10,993.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 215000
  custom_metrics: {}
  date: 2021-10-23_20-56-22
  done: false
  episode_len_mean: 993.97
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 292
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.543506992165697e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.08308562263846397
          entropy_coeff: 0.009999999999999998
          kl: 0.0038362282651683522
          policy_loss: -0.03109388202428818
          total_loss: -0.02420074956284629
          vf_explained_var: -0.6056704521179199
          vf_loss: 0.007723993127648201
    num_agent_steps_sampled: 215000
    num_agent_steps_trained: 215000
    num_steps_sampled: 215000
    num_steps_trained: 21500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,215,3390.39,215000,-9.9397,-8.51,-10,993.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 216000
  custom_metrics: {}
  date: 2021-10-23_20-56-33
  done: false
  episode_len_mean: 993.97
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 293
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7717534960828484e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.09772099951903025
          entropy_coeff: 0.009999999999999998
          kl: 0.006722585580104755
          policy_loss: -0.031441161036491395
          total_loss: -0.02419389088948568
          vf_explained_var: -0.6138069033622742
          vf_loss: 0.008224478281206555
    num_agent_steps_sampled: 216000
    num_agent_steps_trained: 216000
    num_steps_sampled: 216000
    num_steps_trained: 2160

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,216,3401.87,216000,-9.9397,-8.51,-10,993.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 217000
  custom_metrics: {}
  date: 2021-10-23_20-56-46
  done: false
  episode_len_mean: 993.97
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 294
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7717534960828484e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.08318656442893876
          entropy_coeff: 0.009999999999999998
          kl: 0.0019892817712388934
          policy_loss: -0.028042657176653545
          total_loss: -0.021077269812424977
          vf_explained_var: -0.5923990607261658
          vf_loss: 0.007797252485761419
    num_agent_steps_sampled: 217000
    num_agent_steps_trained: 217000
    num_steps_sampled: 217000
    num_steps_trained: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,217,3414.19,217000,-9.9397,-8.51,-10,993.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 218000
  custom_metrics: {}
  date: 2021-10-23_20-56-56
  done: false
  episode_len_mean: 994.05
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940499999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 295
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.858767480414242e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.0894173757897483
          entropy_coeff: 0.009999999999999998
          kl: 0.006849402765510604
          policy_loss: -0.030525876581668852
          total_loss: -0.02311396532588535
          vf_explained_var: -0.6117503643035889
          vf_loss: 0.008306085345490525
    num_agent_steps_sampled: 218000
    num_agent_steps_trained: 218000
    num_steps_sampled: 218000
    num_steps_trained: 218000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,218,3424.28,218000,-9.9405,-8.51,-10,994.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 219000
  custom_metrics: {}
  date: 2021-10-23_20-57-07
  done: false
  episode_len_mean: 994.13
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941299999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 296
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.858767480414242e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.0831284853319327
          entropy_coeff: 0.009999999999999998
          kl: 0.0035235784275250304
          policy_loss: -0.03708277783460087
          total_loss: -0.029958371735281413
          vf_explained_var: -0.6008654832839966
          vf_loss: 0.00795568938418809
    num_agent_steps_sampled: 219000
    num_agent_steps_trained: 219000
    num_steps_sampled: 219000
    num_steps_trained: 219000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,219,3435.17,219000,-9.9413,-8.51,-10,994.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 220000
  custom_metrics: {}
  date: 2021-10-23_20-57-18
  done: false
  episode_len_mean: 994.13
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941299999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 297
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.429383740207121e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.08081076790889104
          entropy_coeff: 0.009999999999999998
          kl: 0.002026735931415007
          policy_loss: -0.02928614334927665
          total_loss: -0.022166063553757137
          vf_explained_var: -0.6389461159706116
          vf_loss: 0.007928190445656784
    num_agent_steps_sampled: 220000
    num_agent_steps_trained: 220000
    num_steps_sampled: 220000
    num_steps_trained: 22000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,220,3446.66,220000,-9.9413,-8.51,-10,994.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 221000
  custom_metrics: {}
  date: 2021-10-23_20-57-30
  done: false
  episode_len_mean: 994.17
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941699999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 298
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2146918701035605e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.08735404262940089
          entropy_coeff: 0.009999999999999998
          kl: 0.0031313244292202097
          policy_loss: -0.03880772236734629
          total_loss: -0.030936278506285615
          vf_explained_var: -0.6057917475700378
          vf_loss: 0.008744985693030886
    num_agent_steps_sampled: 221000
    num_agent_steps_trained: 221000
    num_steps_sampled: 221000
    num_steps_trained: 221

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,221,3458.17,221000,-9.9417,-8.51,-10,994.17




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 222000
  custom_metrics: {}
  date: 2021-10-23_20-57-58
  done: false
  episode_len_mean: 992.79
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.927899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 300
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1073459350517803e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.08707411363720893
          entropy_coeff: 0.009999999999999998
          kl: 0.005608439633053624
          policy_loss: 0.10296230382389493
          total_loss: 0.1102840043604374
          vf_explained_var: -0.6058398485183716
          vf_loss: 0.008192444011931204
    num_agent_steps_sampled: 222000
    num_agent_steps_trained: 222000
    num_steps_sampled: 222000
    num_steps_trained: 222000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,222,3486.93,222000,-9.9279,-8.51,-10,992.79


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 223000
  custom_metrics: {}
  date: 2021-10-23_20-58-12
  done: false
  episode_len_mean: 992.79
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.927899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 301
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1073459350517803e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.08455717381503847
          entropy_coeff: 0.009999999999999998
          kl: 0.0021867505229440413
          policy_loss: 0.18136302381753922
          total_loss: 0.18192513883113862
          vf_explained_var: -1.0
          vf_loss: 0.0014076852912290228
    num_agent_steps_sampled: 223000
    num_agent_steps_trained: 223000
    num_steps_sampled: 223000
    num_steps_trained: 223000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,223,3500.43,223000,-9.9279,-8.51,-10,992.79


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 224000
  custom_metrics: {}
  date: 2021-10-23_20-58-24
  done: false
  episode_len_mean: 992.79
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.927899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 302
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.536729675258901e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.0873591027326054
          entropy_coeff: 0.009999999999999998
          kl: 0.0017684567781139373
          policy_loss: 0.1284034575853083
          total_loss: 0.12916328608989716
          vf_explained_var: -1.0
          vf_loss: 0.001633419837647428
    num_agent_steps_sampled: 224000
    num_agent_steps_trained: 224000
    num_steps_sampled: 224000
    num_steps_trained: 224000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,224,3512.06,224000,-9.9279,-8.51,-10,992.79


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 225000
  custom_metrics: {}
  date: 2021-10-23_20-58-35
  done: false
  episode_len_mean: 992.83
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.928299999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 303
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7683648376294506e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.08226947006252076
          entropy_coeff: 0.009999999999999998
          kl: 0.000643874299647804
          policy_loss: 0.082339070406225
          total_loss: 0.08293207147055202
          vf_explained_var: -1.0
          vf_loss: 0.0014156965849300225
    num_agent_steps_sampled: 225000
    num_agent_steps_trained: 225000
    num_steps_sampled: 225000
    num_steps_trained: 225000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,225,3523.47,225000,-9.9283,-8.51,-10,992.83


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 226000
  custom_metrics: {}
  date: 2021-10-23_20-58-46
  done: false
  episode_len_mean: 992.83
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.928299999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 304
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3841824188147253e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.07915013904372852
          entropy_coeff: 0.009999999999999998
          kl: 0.00046892094986914244
          policy_loss: 0.06348298353453477
          total_loss: 0.06359435965617498
          vf_explained_var: -1.0
          vf_loss: 0.0009028771329515924
    num_agent_steps_sampled: 226000
    num_agent_steps_trained: 226000
    num_steps_sampled: 226000
    num_steps_trained: 226000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,226,3534.83,226000,-9.9283,-8.51,-10,992.83


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 227000
  custom_metrics: {}
  date: 2021-10-23_20-58-58
  done: false
  episode_len_mean: 992.83
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.928299999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 305
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.920912094073627e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.08774302982621723
          entropy_coeff: 0.009999999999999998
          kl: 0.0010535807885591769
          policy_loss: 0.03836628266920646
          total_loss: 0.039129944062895244
          vf_explained_var: -1.0
          vf_loss: 0.0016410912412943113
    num_agent_steps_sampled: 227000
    num_agent_steps_trained: 227000
    num_steps_sampled: 227000
    num_steps_trained: 227000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,227,3546.19,227000,-9.9283,-8.51,-10,992.83


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 228000
  custom_metrics: {}
  date: 2021-10-23_20-59-09
  done: false
  episode_len_mean: 992.83
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.928299999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 306
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4604560470368133e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.08818150651123789
          entropy_coeff: 0.009999999999999998
          kl: 0.0017369368333472973
          policy_loss: -0.0014641861948702071
          total_loss: -0.0010203873324725363
          vf_explained_var: -0.86761474609375
          vf_loss: 0.0013256144334769084
    num_agent_steps_sampled: 228000
    num_agent_steps_trained: 228000
    num_steps_sampled: 228000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,228,3557.55,228000,-9.9283,-8.51,-10,992.83


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 229000
  custom_metrics: {}
  date: 2021-10-23_20-59-21
  done: false
  episode_len_mean: 992.91
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929099999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 307
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7302280235184066e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.0789089135825634
          entropy_coeff: 0.009999999999999998
          kl: 0.0005080699670037979
          policy_loss: 0.01611444177106023
          total_loss: 0.016489391380714046
          vf_explained_var: -1.0
          vf_loss: 0.001164038703104274
    num_agent_steps_sampled: 229000
    num_agent_steps_trained: 229000
    num_steps_sampled: 229000
    num_steps_trained: 229000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,229,3568.91,229000,-9.9291,-8.51,-10,992.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 230000
  custom_metrics: {}
  date: 2021-10-23_20-59-32
  done: false
  episode_len_mean: 992.91
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929099999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 308
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.651140117592033e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.08130194884207513
          entropy_coeff: 0.009999999999999998
          kl: 0.00036136252715045377
          policy_loss: 0.01906531386387845
          total_loss: 0.019390645142023762
          vf_explained_var: -1.0
          vf_loss: 0.0011383506417688397
    num_agent_steps_sampled: 230000
    num_agent_steps_trained: 230000
    num_steps_sampled: 230000
    num_steps_trained: 230000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,230,3580.17,230000,-9.9291,-8.51,-10,992.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 231000
  custom_metrics: {}
  date: 2021-10-23_20-59-43
  done: false
  episode_len_mean: 992.95
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.929499999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 309
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3255700587960166e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.07820437558823162
          entropy_coeff: 0.009999999999999998
          kl: 0.00043172879539067963
          policy_loss: 0.0188524578180578
          total_loss: 0.019059457474698623
          vf_explained_var: -1.0
          vf_loss: 0.0009890437436600526
    num_agent_steps_sampled: 231000
    num_agent_steps_trained: 231000
    num_steps_sampled: 231000
    num_steps_trained: 231000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,231,3591.76,231000,-9.9295,-8.51,-10,992.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 232000
  custom_metrics: {}
  date: 2021-10-23_20-59-55
  done: false
  episode_len_mean: 994.2
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941999999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 310
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1627850293980083e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.08327313380108939
          entropy_coeff: 0.009999999999999998
          kl: 0.0011554515875306808
          policy_loss: 0.021094508613977166
          total_loss: 0.021142233639127678
          vf_explained_var: -1.0
          vf_loss: 0.0008804560302653247
    num_agent_steps_sampled: 232000
    num_agent_steps_trained: 232000
    num_steps_sampled: 232000
    num_steps_trained: 232000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,232,3603.24,232000,-9.942,-8.51,-10,994.2


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 233000
  custom_metrics: {}
  date: 2021-10-23_21-00-06
  done: false
  episode_len_mean: 994.16
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 311
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0813925146990042e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.07957473446925481
          entropy_coeff: 0.009999999999999998
          kl: 0.000305917647347087
          policy_loss: 0.04529339927766058
          total_loss: 0.04520043440990978
          vf_explained_var: -1.0
          vf_loss: 0.0007027829971371426
    num_agent_steps_sampled: 233000
    num_agent_steps_trained: 233000
    num_steps_sampled: 233000
    num_steps_trained: 233000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,233,3614.59,233000,-9.9416,-8.51,-10,994.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 234000
  custom_metrics: {}
  date: 2021-10-23_21-00-18
  done: false
  episode_len_mean: 994.12
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941199999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 312
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.406962573495021e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.0849320902592606
          entropy_coeff: 0.009999999999999998
          kl: 0.0010655482102366579
          policy_loss: 0.04031379409134388
          total_loss: 0.04047763684971465
          vf_explained_var: -1.0
          vf_loss: 0.0010131627411788537
    num_agent_steps_sampled: 234000
    num_agent_steps_trained: 234000
    num_steps_sampled: 234000
    num_steps_trained: 234000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,234,3626.54,234000,-9.9412,-8.51,-10,994.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 235000
  custom_metrics: {}
  date: 2021-10-23_21-00-30
  done: false
  episode_len_mean: 994.12
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941199999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 313
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7034812867475104e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.083300904598501
          entropy_coeff: 0.009999999999999998
          kl: 0.0016698866177143321
          policy_loss: 0.033894763679967986
          total_loss: 0.033975521412988506
          vf_explained_var: -0.9903243780136108
          vf_loss: 0.000913766649318859
    num_agent_steps_sampled: 235000
    num_agent_steps_trained: 235000
    num_steps_sampled: 235000
    num_steps_trained: 235000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,235,3637.9,235000,-9.9412,-8.51,-10,994.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 236000
  custom_metrics: {}
  date: 2021-10-23_21-00-41
  done: false
  episode_len_mean: 994.12
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941199999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 314
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3517406433737552e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.07761537233988444
          entropy_coeff: 0.009999999999999998
          kl: 0.0008499057691854735
          policy_loss: 0.050303292667700186
          total_loss: 0.05038716637839873
          vf_explained_var: -1.0
          vf_loss: 0.0008600270433817059
    num_agent_steps_sampled: 236000
    num_agent_steps_trained: 236000
    num_steps_sampled: 236000
    num_steps_trained: 236000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,236,3649.32,236000,-9.9412,-8.51,-10,994.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 237000
  custom_metrics: {}
  date: 2021-10-23_21-00-52
  done: false
  episode_len_mean: 994.13
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941299999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 315
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.758703216868776e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.08229314792487356
          entropy_coeff: 0.009999999999999998
          kl: 0.0013398094272512632
          policy_loss: 0.0756856123606364
          total_loss: 0.0757325653400686
          vf_explained_var: -1.0
          vf_loss: 0.0008698842986228152
    num_agent_steps_sampled: 237000
    num_agent_steps_trained: 237000
    num_steps_sampled: 237000
    num_steps_trained: 237000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,237,3660.65,237000,-9.9413,-8.51,-10,994.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 238000
  custom_metrics: {}
  date: 2021-10-23_21-01-04
  done: false
  episode_len_mean: 994.13
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.941299999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 316
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.379351608434388e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.08478353958990839
          entropy_coeff: 0.009999999999999998
          kl: 0.0011114802851807325
          policy_loss: 0.04776963790257772
          total_loss: 0.047697386766473455
          vf_explained_var: -1.0
          vf_loss: 0.000775584188522771
    num_agent_steps_sampled: 238000
    num_agent_steps_trained: 238000
    num_steps_sampled: 238000
    num_steps_trained: 238000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,238,3671.98,238000,-9.9413,-8.51,-10,994.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 239000
  custom_metrics: {}
  date: 2021-10-23_21-01-15
  done: false
  episode_len_mean: 994.09
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940899999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 317
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.689675804217194e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.08922625887725089
          entropy_coeff: 0.009999999999999998
          kl: 0.0016932382856288718
          policy_loss: 0.028868897114362982
          total_loss: 0.028847697811822098
          vf_explained_var: -1.0
          vf_loss: 0.0008710633180776818
    num_agent_steps_sampled: 239000
    num_agent_steps_trained: 239000
    num_steps_sampled: 239000
    num_steps_trained: 239000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,239,3683.3,239000,-9.9409,-8.51,-10,994.09


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 240000
  custom_metrics: {}
  date: 2021-10-23_21-01-27
  done: false
  episode_len_mean: 994.05
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940499999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 318
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.44837902108597e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.08476586267352104
          entropy_coeff: 0.009999999999999998
          kl: 0.00148690591158811
          policy_loss: 0.05177186762707101
          total_loss: 0.051668375368333526
          vf_explained_var: -1.0
          vf_loss: 0.0007441663749179699
    num_agent_steps_sampled: 240000
    num_agent_steps_trained: 240000
    num_steps_sampled: 240000
    num_steps_trained: 240000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,240,3694.64,240000,-9.9405,-8.51,-10,994.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 241000
  custom_metrics: {}
  date: 2021-10-23_21-01-38
  done: false
  episode_len_mean: 994.01
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940099999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 319
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.224189510542985e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.08168865144252777
          entropy_coeff: 0.009999999999999998
          kl: 0.0006122474157665338
          policy_loss: 0.026858636426428953
          total_loss: 0.026545979330937067
          vf_explained_var: -1.0
          vf_loss: 0.0005042290767758256
    num_agent_steps_sampled: 241000
    num_agent_steps_trained: 241000
    num_steps_sampled: 241000
    num_steps_trained: 241000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,241,3705.97,241000,-9.9401,-8.51,-10,994.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 242000
  custom_metrics: {}
  date: 2021-10-23_21-01-49
  done: false
  episode_len_mean: 994.04
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940399999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 320
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1120947552714925e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.08226304567522473
          entropy_coeff: 0.009999999999999998
          kl: 0.0007635790002596979
          policy_loss: 0.046451087171832724
          total_loss: 0.046324153078926934
          vf_explained_var: -1.0
          vf_loss: 0.0006956961413379758
    num_agent_steps_sampled: 242000
    num_agent_steps_trained: 242000
    num_steps_sampled: 242000
    num_steps_trained: 242000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,242,3717.45,242000,-9.9404,-8.51,-10,994.04


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 243000
  custom_metrics: {}
  date: 2021-10-23_21-02-00
  done: false
  episode_len_mean: 994.0
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939999999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 321
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0560473776357462e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.08217297908332613
          entropy_coeff: 0.009999999999999998
          kl: 0.001546124219506358
          policy_loss: 0.05555142917566829
          total_loss: 0.055367483902308674
          vf_explained_var: -1.0
          vf_loss: 0.0006377842804391144
    num_agent_steps_sampled: 243000
    num_agent_steps_trained: 243000
    num_steps_sampled: 243000
    num_steps_trained: 243000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,243,3728.39,243000,-9.94,-8.51,-10,994


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 244000
  custom_metrics: {}
  date: 2021-10-23_21-02-10
  done: false
  episode_len_mean: 993.96
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939599999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 322
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.280236888178731e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.08245055948694548
          entropy_coeff: 0.009999999999999998
          kl: 0.0007816432140114355
          policy_loss: 0.03814706260131465
          total_loss: 0.03795031719944543
          vf_explained_var: -1.0
          vf_loss: 0.0006277600678408311
    num_agent_steps_sampled: 244000
    num_agent_steps_trained: 244000
    num_steps_sampled: 244000
    num_steps_trained: 244000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,244,3738.38,244000,-9.9396,-8.51,-10,993.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 245000
  custom_metrics: {}
  date: 2021-10-23_21-02-22
  done: false
  episode_len_mean: 993.96
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939599999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 323
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6401184440893656e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.08542710451616181
          entropy_coeff: 0.009999999999999998
          kl: 0.0015878482702343414
          policy_loss: 0.03702300899765558
          total_loss: 0.037129088087628284
          vf_explained_var: -1.0
          vf_loss: 0.0009603506749651084
    num_agent_steps_sampled: 245000
    num_agent_steps_trained: 245000
    num_steps_sampled: 245000
    num_steps_trained: 245000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,245,3749.55,245000,-9.9396,-8.51,-10,993.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 246000
  custom_metrics: {}
  date: 2021-10-23_21-02-33
  done: false
  episode_len_mean: 993.92
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.93919999999983
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 324
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3200592220446828e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.08085559863183234
          entropy_coeff: 0.009999999999999998
          kl: 0.0013070499385422509
          policy_loss: 0.07912533804774284
          total_loss: 0.07885995879769325
          vf_explained_var: -1.0
          vf_loss: 0.000543176959359294
    num_agent_steps_sampled: 246000
    num_agent_steps_trained: 246000
    num_steps_sampled: 246000
    num_steps_trained: 246000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,246,3761.21,246000,-9.9392,-8.51,-10,993.92


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 247000
  custom_metrics: {}
  date: 2021-10-23_21-02-45
  done: false
  episode_len_mean: 993.92
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939199999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 325
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.600296110223414e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.08020829045110278
          entropy_coeff: 0.009999999999999998
          kl: 0.00036586422219342136
          policy_loss: 0.05390699033935865
          total_loss: 0.05356196703182326
          vf_explained_var: -1.0
          vf_loss: 0.00045705906846301835
    num_agent_steps_sampled: 247000
    num_agent_steps_trained: 247000
    num_steps_sampled: 247000
    num_steps_trained: 247000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,247,3772.64,247000,-9.9392,-8.51,-10,993.92


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 248000
  custom_metrics: {}
  date: 2021-10-23_21-02-56
  done: false
  episode_len_mean: 993.92
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939199999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 326
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.300148055111707e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.08334111149112383
          entropy_coeff: 0.009999999999999998
          kl: 0.001205674546589661
          policy_loss: 0.05543832994169659
          total_loss: 0.05528902127924892
          vf_explained_var: -0.8921642899513245
          vf_loss: 0.0006841028041283911
    num_agent_steps_sampled: 248000
    num_agent_steps_trained: 248000
    num_steps_sampled: 248000
    num_steps_trained: 248000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,248,3784.15,248000,-9.9392,-8.51,-10,993.92


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 249000
  custom_metrics: {}
  date: 2021-10-23_21-03-08
  done: false
  episode_len_mean: 993.88
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938799999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 327
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6500740275558535e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.08782147251897388
          entropy_coeff: 0.009999999999999998
          kl: 0.0009313006177156543
          policy_loss: 0.04348832513723108
          total_loss: 0.04312781846771638
          vf_explained_var: -1.0
          vf_loss: 0.0005177083085679139
    num_agent_steps_sampled: 249000
    num_agent_steps_trained: 249000
    num_steps_sampled: 249000
    num_steps_trained: 249000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,249,3795.94,249000,-9.9388,-8.51,-10,993.88


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 250000
  custom_metrics: {}
  date: 2021-10-23_21-03-21
  done: false
  episode_len_mean: 993.88
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938799999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 328
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.250370137779268e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.09372077311078707
          entropy_coeff: 0.009999999999999998
          kl: 0.0015779595440512317
          policy_loss: 0.010083279344770643
          total_loss: 0.009899836892469063
          vf_explained_var: -1.0
          vf_loss: 0.0007537650694656703
    num_agent_steps_sampled: 250000
    num_agent_steps_trained: 250000
    num_steps_sampled: 250000
    num_steps_trained: 250000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,250,3808.45,250000,-9.9388,-8.51,-10,993.88


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 251000
  custom_metrics: {}
  date: 2021-10-23_21-03-33
  done: false
  episode_len_mean: 993.88
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938799999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 329
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.125185068889634e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.08279410418536928
          entropy_coeff: 0.009999999999999998
          kl: 0.0009367943825458901
          policy_loss: 0.028665414162807993
          total_loss: 0.028295835562878183
          vf_explained_var: -1.0
          vf_loss: 0.00045836234833889953
    num_agent_steps_sampled: 251000
    num_agent_steps_trained: 251000
    num_steps_sampled: 251000
    num_steps_trained: 251000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,251,3821.09,251000,-9.9388,-8.51,-10,993.88




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 252000
  custom_metrics: {}
  date: 2021-10-23_21-04-02
  done: false
  episode_len_mean: 992.55
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925499999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 330
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.062592534444817e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.0848969670633475
          entropy_coeff: 0.009999999999999998
          kl: 0.00505976343799072
          policy_loss: -0.016162612703111437
          total_loss: -0.0062780768507056765
          vf_explained_var: -0.6182386875152588
          vf_loss: 0.010733505555092254
    num_agent_steps_sampled: 252000
    num_agent_steps_trained: 252000
    num_steps_sampled: 252000
    num_steps_trained: 25200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,252,3849.4,252000,-9.9255,-8.51,-10,992.55


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 253000
  custom_metrics: {}
  date: 2021-10-23_21-04-15
  done: false
  episode_len_mean: 992.51
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925099999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 331
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.062592534444817e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.0798502195212576
          entropy_coeff: 0.009999999999999998
          kl: 0.0021327796049364326
          policy_loss: -0.027068841871288087
          total_loss: -0.017974157631397248
          vf_explained_var: -0.6380021572113037
          vf_loss: 0.009893185631113334
    num_agent_steps_sampled: 253000
    num_agent_steps_trained: 253000
    num_steps_sampled: 253000
    num_steps_trained: 2530

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,253,3863,253000,-9.9251,-8.51,-10,992.51


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 254000
  custom_metrics: {}
  date: 2021-10-23_21-04-27
  done: false
  episode_len_mean: 992.47
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.924699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 332
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0312962672224084e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.0801271651354101
          entropy_coeff: 0.009999999999999998
          kl: 0.0030921195090437927
          policy_loss: -0.008049673048986329
          total_loss: 0.0006851755082607269
          vf_explained_var: -0.6240313649177551
          vf_loss: 0.009536115857513829
    num_agent_steps_sampled: 254000
    num_agent_steps_trained: 254000
    num_steps_sampled: 254000
    num_steps_trained: 254

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,254,3874.92,254000,-9.9247,-8.51,-10,992.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 255000
  custom_metrics: {}
  date: 2021-10-23_21-04-39
  done: false
  episode_len_mean: 992.44
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.924399999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 333
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.156481336112042e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.08451000278194745
          entropy_coeff: 0.009999999999999998
          kl: 0.00252176186380287
          policy_loss: -0.012836989760398865
          total_loss: -0.0045703091555171545
          vf_explained_var: -0.6500861048698425
          vf_loss: 0.00911177982796087
    num_agent_steps_sampled: 255000
    num_agent_steps_trained: 255000
    num_steps_sampled: 255000
    num_steps_trained: 25500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,255,3886.42,255000,-9.9244,-8.51,-10,992.44


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 256000
  custom_metrics: {}
  date: 2021-10-23_21-04-50
  done: false
  episode_len_mean: 992.47
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.924699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 334
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.578240668056021e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.08342535884843932
          entropy_coeff: 0.009999999999999998
          kl: 0.0021358786196085727
          policy_loss: -0.022350994911458758
          total_loss: -0.014761928468942642
          vf_explained_var: -0.6451952457427979
          vf_loss: 0.008423321053204645
    num_agent_steps_sampled: 256000
    num_agent_steps_trained: 256000
    num_steps_sampled: 256000
    num_steps_trained: 256

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,256,3897.68,256000,-9.9247,-8.51,-10,992.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 257000
  custom_metrics: {}
  date: 2021-10-23_21-05-01
  done: false
  episode_len_mean: 992.47
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.924699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 335
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2891203340280106e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.08175632928808531
          entropy_coeff: 0.009999999999999998
          kl: 0.0019298010298775302
          policy_loss: -0.010989198585351308
          total_loss: -0.0034116978446642556
          vf_explained_var: -0.6611591577529907
          vf_loss: 0.008395067653166027
    num_agent_steps_sampled: 257000
    num_agent_steps_trained: 257000
    num_steps_sampled: 257000
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,257,3909.18,257000,-9.9247,-8.51,-10,992.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 258000
  custom_metrics: {}
  date: 2021-10-23_21-05-13
  done: false
  episode_len_mean: 992.48
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.924799999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 336
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.445601670140053e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.08351033876339595
          entropy_coeff: 0.009999999999999998
          kl: 0.0032384670882796246
          policy_loss: -0.03229884488715066
          total_loss: -0.02486774747570356
          vf_explained_var: -0.649695634841919
          vf_loss: 0.008266197898127657
    num_agent_steps_sampled: 258000
    num_agent_steps_trained: 258000
    num_steps_sampled: 258000
    num_steps_trained: 258000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,258,3920.58,258000,-9.9248,-8.51,-10,992.48


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 259000
  custom_metrics: {}
  date: 2021-10-23_21-05-25
  done: false
  episode_len_mean: 992.48
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.924799999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 337
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2228008350700264e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.08224412525693575
          entropy_coeff: 0.009999999999999998
          kl: 0.003195649755394293
          policy_loss: -0.026471385856469472
          total_loss: -0.018848686748080785
          vf_explained_var: -0.6674529314041138
          vf_loss: 0.008445141450566653
    num_agent_steps_sampled: 259000
    num_agent_steps_trained: 259000
    num_steps_sampled: 259000
    num_steps_trained: 259

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,259,3932.32,259000,-9.9248,-8.51,-10,992.48


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 260000
  custom_metrics: {}
  date: 2021-10-23_21-05-37
  done: false
  episode_len_mean: 992.41
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.924099999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 338
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6114004175350132e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.08660331865151723
          entropy_coeff: 0.009999999999999998
          kl: 0.004401201457302603
          policy_loss: -0.0176169635521041
          total_loss: -0.010228357712427775
          vf_explained_var: -0.6293471455574036
          vf_loss: 0.008254637657147314
    num_agent_steps_sampled: 260000
    num_agent_steps_trained: 260000
    num_steps_sampled: 260000
    num_steps_trained: 26000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,260,3944.13,260000,-9.9241,-8.51,-10,992.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 261000
  custom_metrics: {}
  date: 2021-10-23_21-05-49
  done: false
  episode_len_mean: 992.49
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.924899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 339
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.057002087675066e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.08561006519529554
          entropy_coeff: 0.009999999999999998
          kl: 0.004613189156063729
          policy_loss: -0.0288247003323502
          total_loss: -0.02175771097342173
          vf_explained_var: -0.34549152851104736
          vf_loss: 0.00792308897653129
    num_agent_steps_sampled: 261000
    num_agent_steps_trained: 261000
    num_steps_sampled: 261000
    num_steps_trained: 261000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,261,3956.78,261000,-9.9249,-8.51,-10,992.49


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 262000
  custom_metrics: {}
  date: 2021-10-23_21-06-01
  done: false
  episode_len_mean: 993.87
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938699999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 340
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.028501043837533e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.08775380187564426
          entropy_coeff: 0.009999999999999998
          kl: 0.0027378737926483155
          policy_loss: -0.04072999308506648
          total_loss: -0.03381041387716929
          vf_explained_var: -0.6532634496688843
          vf_loss: 0.007797116586718605
    num_agent_steps_sampled: 262000
    num_agent_steps_trained: 262000
    num_steps_sampled: 262000
    num_steps_trained: 26200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,262,3968.92,262000,-9.9387,-8.51,-10,993.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 263000
  custom_metrics: {}
  date: 2021-10-23_21-06-14
  done: false
  episode_len_mean: 993.87
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938699999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 341
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0142505219187665e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.08618083977037005
          entropy_coeff: 0.009999999999999998
          kl: 0.002924855383268247
          policy_loss: -0.036219486097494764
          total_loss: -0.02939577880832884
          vf_explained_var: -0.6479942202568054
          vf_loss: 0.007685514424358391
    num_agent_steps_sampled: 263000
    num_agent_steps_trained: 263000
    num_steps_sampled: 263000
    num_steps_trained: 2630

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,263,3981.31,263000,-9.9387,-8.51,-10,993.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 264000
  custom_metrics: {}
  date: 2021-10-23_21-06-25
  done: false
  episode_len_mean: 993.91
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939099999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 342
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0071252609593832e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.0828174321187867
          entropy_coeff: 0.009999999999999998
          kl: 0.003259977301220513
          policy_loss: -0.04745126499070062
          total_loss: -0.04019213517506917
          vf_explained_var: -0.6606536507606506
          vf_loss: 0.008087300401853605
    num_agent_steps_sampled: 264000
    num_agent_steps_trained: 264000
    num_steps_sampled: 264000
    num_steps_trained: 264000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,264,3992.93,264000,-9.9391,-8.51,-10,993.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 265000
  custom_metrics: {}
  date: 2021-10-23_21-06-37
  done: false
  episode_len_mean: 993.91
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939099999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 343
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.035626304796916e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.10439881359537442
          entropy_coeff: 0.009999999999999998
          kl: 0.005626301938253972
          policy_loss: -0.028803529010878667
          total_loss: -0.02124460438887278
          vf_explained_var: -0.6553519368171692
          vf_loss: 0.008602912469197893
    num_agent_steps_sampled: 265000
    num_agent_steps_trained: 265000
    num_steps_sampled: 265000
    num_steps_trained: 26500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,265,4004.44,265000,-9.9391,-8.51,-10,993.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 266000
  custom_metrics: {}
  date: 2021-10-23_21-06-48
  done: false
  episode_len_mean: 993.87
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938699999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 344
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.035626304796916e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.08652021868361368
          entropy_coeff: 0.009999999999999998
          kl: 0.0038104886965205273
          policy_loss: -0.024422819746865165
          total_loss: -0.017825992736551496
          vf_explained_var: -0.621018648147583
          vf_loss: 0.007462031027534977
    num_agent_steps_sampled: 266000
    num_agent_steps_trained: 266000
    num_steps_sampled: 266000
    num_steps_trained: 2660

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,266,4015.85,266000,-9.9387,-8.51,-10,993.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 267000
  custom_metrics: {}
  date: 2021-10-23_21-07-00
  done: false
  episode_len_mean: 993.83
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938299999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 345
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.517813152398458e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.07968754726979467
          entropy_coeff: 0.009999999999999998
          kl: 0.00297493338261524
          policy_loss: -0.03581805494096544
          total_loss: -0.029440548022588095
          vf_explained_var: -0.6186951398849487
          vf_loss: 0.007174387318728905
    num_agent_steps_sampled: 267000
    num_agent_steps_trained: 267000
    num_steps_sampled: 267000
    num_steps_trained: 267000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,267,4027.4,267000,-9.9383,-8.51,-10,993.83


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 268000
  custom_metrics: {}
  date: 2021-10-23_21-07-12
  done: false
  episode_len_mean: 993.82
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938199999999831
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 346
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.258906576199229e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.08460462854968177
          entropy_coeff: 0.009999999999999998
          kl: 0.005156421461530651
          policy_loss: -0.015402500165833367
          total_loss: -0.008683156304889255
          vf_explained_var: -0.6229177713394165
          vf_loss: 0.007565388884783412
    num_agent_steps_sampled: 268000
    num_agent_steps_trained: 268000
    num_steps_sampled: 268000
    num_steps_trained: 2680

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,268,4039.67,268000,-9.9382,-8.51,-10,993.82


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 269000
  custom_metrics: {}
  date: 2021-10-23_21-07-24
  done: false
  episode_len_mean: 993.86
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938599999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 347
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.258906576199229e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.08524307741059198
          entropy_coeff: 0.009999999999999998
          kl: 0.005396350259737422
          policy_loss: -0.008472622434298197
          total_loss: -0.0019806154900126986
          vf_explained_var: -0.6384806036949158
          vf_loss: 0.007344435850003114
    num_agent_steps_sampled: 269000
    num_agent_steps_trained: 269000
    num_steps_sampled: 269000
    num_steps_trained: 269

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,269,4051.46,269000,-9.9386,-8.51,-10,993.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 270000
  custom_metrics: {}
  date: 2021-10-23_21-07-35
  done: false
  episode_len_mean: 993.9
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938999999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 348
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.258906576199229e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.08515900828772122
          entropy_coeff: 0.009999999999999998
          kl: 0.003496026942351212
          policy_loss: -0.025616247951984406
          total_loss: -0.01908138824833764
          vf_explained_var: -0.6359041929244995
          vf_loss: 0.00738645126305831
    num_agent_steps_sampled: 270000
    num_agent_steps_trained: 270000
    num_steps_sampled: 270000
    num_steps_trained: 270000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,270,4062.03,270000,-9.939,-8.51,-10,993.9


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 271000
  custom_metrics: {}
  date: 2021-10-23_21-07-46
  done: false
  episode_len_mean: 993.9
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938999999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 349
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.294532880996145e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.06018395245903068
          entropy_coeff: 0.009999999999999998
          kl: 0.005382225452922285
          policy_loss: 0.0025361433625221254
          total_loss: 0.009240606758329604
          vf_explained_var: -0.3435811400413513
          vf_loss: 0.0073062978361526296
    num_agent_steps_sampled: 271000
    num_agent_steps_trained: 271000
    num_steps_sampled: 271000
    num_steps_trained: 27100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,271,4073.02,271000,-9.939,-8.51,-10,993.9


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 272000
  custom_metrics: {}
  date: 2021-10-23_21-07-57
  done: false
  episode_len_mean: 993.86
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 350
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.294532880996145e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.09068523339099355
          entropy_coeff: 0.009999999999999998
          kl: 0.01076341818811165
          policy_loss: -0.004796371857325236
          total_loss: 0.0018246061272091335
          vf_explained_var: -0.3958349823951721
          vf_loss: 0.007527833764389571
    num_agent_steps_sampled: 272000
    num_agent_steps_trained: 272000
    num_steps_sampled: 272000
    num_steps_trained: 27200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,272,4084.64,272000,-9.9386,-8.51,-10,993.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 273000
  custom_metrics: {}
  date: 2021-10-23_21-08-09
  done: false
  episode_len_mean: 993.9
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938999999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 351
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.294532880996145e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.09847625063525306
          entropy_coeff: 0.009999999999999998
          kl: 0.006360732929574119
          policy_loss: -0.022273014816972945
          total_loss: -0.015318389236927032
          vf_explained_var: -0.6309231519699097
          vf_loss: 0.007939388018308414
    num_agent_steps_sampled: 273000
    num_agent_steps_trained: 273000
    num_steps_sampled: 273000
    num_steps_trained: 27300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,273,4096.18,273000,-9.939,-8.51,-10,993.9


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 274000
  custom_metrics: {}
  date: 2021-10-23_21-08-20
  done: false
  episode_len_mean: 993.86
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938599999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 352
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.294532880996145e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.08549528991182645
          entropy_coeff: 0.009999999999999998
          kl: 0.003807428256712026
          policy_loss: -0.03748759991592831
          total_loss: -0.03139177742931578
          vf_explained_var: -0.5936582684516907
          vf_loss: 0.006950775558167758
    num_agent_steps_sampled: 274000
    num_agent_steps_trained: 274000
    num_steps_sampled: 274000
    num_steps_trained: 274000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,274,4107.71,274000,-9.9386,-8.51,-10,993.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 275000
  custom_metrics: {}
  date: 2021-10-23_21-08-32
  done: false
  episode_len_mean: 993.86
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938599999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 353
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1472664404980726e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.08203269516428312
          entropy_coeff: 0.009999999999999998
          kl: 0.003888591095003196
          policy_loss: -0.04455780651834276
          total_loss: -0.0383083936240938
          vf_explained_var: -0.6054968237876892
          vf_loss: 0.007069742267615058
    num_agent_steps_sampled: 275000
    num_agent_steps_trained: 275000
    num_steps_sampled: 275000
    num_steps_trained: 275000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,275,4119.07,275000,-9.9386,-8.51,-10,993.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 276000
  custom_metrics: {}
  date: 2021-10-23_21-08-43
  done: false
  episode_len_mean: 993.86
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.938599999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 354
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5736332202490363e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.08791094000140826
          entropy_coeff: 0.009999999999999998
          kl: 0.005058322978180109
          policy_loss: -0.049118958579169376
          total_loss: -0.04247120800945494
          vf_explained_var: -0.6248731017112732
          vf_loss: 0.007526860822690651
    num_agent_steps_sampled: 276000
    num_agent_steps_trained: 276000
    num_steps_sampled: 276000
    num_steps_trained: 2760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,276,4130.6,276000,-9.9386,-8.51,-10,993.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 277000
  custom_metrics: {}
  date: 2021-10-23_21-08-55
  done: false
  episode_len_mean: 993.94
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939399999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 355
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5736332202490363e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.09009846266773013
          entropy_coeff: 0.009999999999999998
          kl: 0.0054526164222301705
          policy_loss: -0.0378801382250256
          total_loss: -0.03144174681769477
          vf_explained_var: -0.6152195334434509
          vf_loss: 0.007339371471122528
    num_agent_steps_sampled: 277000
    num_agent_steps_trained: 277000
    num_steps_sampled: 277000
    num_steps_trained: 27700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,277,4141.91,277000,-9.9394,-8.51,-10,993.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 278000
  custom_metrics: {}
  date: 2021-10-23_21-09-06
  done: false
  episode_len_mean: 993.94
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939399999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 356
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5736332202490363e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.09011569991707802
          entropy_coeff: 0.009999999999999998
          kl: 0.003409152743147893
          policy_loss: -0.03279853165149689
          total_loss: -0.02699541300535202
          vf_explained_var: -0.5863016247749329
          vf_loss: 0.006704278787623884
    num_agent_steps_sampled: 278000
    num_agent_steps_trained: 278000
    num_steps_sampled: 278000
    num_steps_trained: 27800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,278,4153.33,278000,-9.9394,-8.51,-10,993.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 279000
  custom_metrics: {}
  date: 2021-10-23_21-09-18
  done: false
  episode_len_mean: 993.98
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.939799999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 357
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.868166101245182e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.08386005361874899
          entropy_coeff: 0.009999999999999998
          kl: 0.004026409919606522
          policy_loss: -0.04898175514406628
          total_loss: -0.04235023773378796
          vf_explained_var: -0.3599500060081482
          vf_loss: 0.007470117024301241
    num_agent_steps_sampled: 279000
    num_agent_steps_trained: 279000
    num_steps_sampled: 279000
    num_steps_trained: 279000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,279,4164.79,279000,-9.9398,-8.51,-10,993.98


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 280000
  custom_metrics: {}
  date: 2021-10-23_21-09-29
  done: false
  episode_len_mean: 994.02
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940199999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 358
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.934083050622591e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.09023367249303393
          entropy_coeff: 0.009999999999999998
          kl: 0.004088856289551283
          policy_loss: -0.04106204294496112
          total_loss: -0.03495106448729833
          vf_explained_var: -0.6157262921333313
          vf_loss: 0.007013314062108597
    num_agent_steps_sampled: 280000
    num_agent_steps_trained: 280000
    num_steps_sampled: 280000
    num_steps_trained: 280000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,280,4176.25,280000,-9.9402,-8.51,-10,994.02


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 281000
  custom_metrics: {}
  date: 2021-10-23_21-09-42
  done: false
  episode_len_mean: 994.02
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.940199999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 359
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9670415253112954e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.0898866057395935
          entropy_coeff: 0.009999999999999998
          kl: 0.005430299136787653
          policy_loss: -0.025595200724071926
          total_loss: -0.019461101790269216
          vf_explained_var: -0.6213706135749817
          vf_loss: 0.00703296628230924
    num_agent_steps_sampled: 281000
    num_agent_steps_trained: 281000
    num_steps_sampled: 281000
    num_steps_trained: 28100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,281,4188.82,281000,-9.9402,-8.51,-10,994.02




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 282000
  custom_metrics: {}
  date: 2021-10-23_21-10-11
  done: false
  episode_len_mean: 992.6
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925999999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 360
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9670415253112954e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.09315253736244308
          entropy_coeff: 0.009999999999999998
          kl: 0.007022526679793373
          policy_loss: -0.04243249131573571
          total_loss: -0.0331381791167789
          vf_explained_var: -0.3036552667617798
          vf_loss: 0.010225835751690384
    num_agent_steps_sampled: 282000
    num_agent_steps_trained: 282000
    num_steps_sampled: 282000
    num_steps_trained: 282000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,282,4217.85,282000,-9.926,-8.51,-10,992.6


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 283000
  custom_metrics: {}
  date: 2021-10-23_21-10-24
  done: false
  episode_len_mean: 992.6
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925999999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 361
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9670415253112954e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.09962984927826457
          entropy_coeff: 0.009999999999999998
          kl: 0.013939617726848357
          policy_loss: -0.03224762363566293
          total_loss: -0.024369485014014775
          vf_explained_var: -0.34359419345855713
          vf_loss: 0.008874434958367297
    num_agent_steps_sampled: 283000
    num_agent_steps_trained: 283000
    num_steps_sampled: 283000
    num_steps_trained: 2830

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,283,4231.33,283000,-9.926,-8.51,-10,992.6


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 284000
  custom_metrics: {}
  date: 2021-10-23_21-10-36
  done: false
  episode_len_mean: 992.56
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 362
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9670415253112954e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.08064178004860878
          entropy_coeff: 0.009999999999999998
          kl: 0.00407933540635794
          policy_loss: -0.054830118517080945
          total_loss: -0.047157884140809375
          vf_explained_var: -0.5158957839012146
          vf_loss: 0.00847865497150148
    num_agent_steps_sampled: 284000
    num_agent_steps_trained: 284000
    num_steps_sampled: 284000
    num_steps_trained: 28400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,284,4242.88,284000,-9.9256,-8.51,-10,992.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 285000
  custom_metrics: {}
  date: 2021-10-23_21-10-47
  done: false
  episode_len_mean: 992.56
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 363
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.835207626556477e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.08367696992225117
          entropy_coeff: 0.009999999999999998
          kl: 0.005272162137811796
          policy_loss: -0.049653248820039964
          total_loss: -0.041793014026350446
          vf_explained_var: -0.5127847790718079
          vf_loss: 0.008697000184070526
    num_agent_steps_sampled: 285000
    num_agent_steps_trained: 285000
    num_steps_sampled: 285000
    num_steps_trained: 2850

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,285,4254.41,285000,-9.9256,-8.51,-10,992.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 286000
  custom_metrics: {}
  date: 2021-10-23_21-10-59
  done: false
  episode_len_mean: 992.56
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 364
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.835207626556477e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.16679843705561426
          entropy_coeff: 0.009999999999999998
          kl: 0.028709960252874426
          policy_loss: -0.05361818455987506
          total_loss: -0.04647663301891751
          vf_explained_var: -0.658913791179657
          vf_loss: 0.008809534907858406
    num_agent_steps_sampled: 286000
    num_agent_steps_trained: 286000
    num_steps_sampled: 286000
    num_steps_trained: 286000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,286,4265.94,286000,-9.9256,-8.51,-10,992.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 287000
  custom_metrics: {}
  date: 2021-10-23_21-11-10
  done: false
  episode_len_mean: 992.52
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925199999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 365
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.475281143983471e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.07976168460316128
          entropy_coeff: 0.009999999999999998
          kl: 0.004215647924380997
          policy_loss: -0.03982136771082878
          total_loss: -0.031096679303381176
          vf_explained_var: -0.3318208158016205
          vf_loss: 0.009522307004469136
    num_agent_steps_sampled: 287000
    num_agent_steps_trained: 287000
    num_steps_sampled: 287000
    num_steps_trained: 28700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,287,4277.33,287000,-9.9252,-8.51,-10,992.52


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 288000
  custom_metrics: {}
  date: 2021-10-23_21-11-22
  done: false
  episode_len_mean: 992.52
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925199999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 366
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.376405719917355e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.07752519701090124
          entropy_coeff: 0.009999999999999998
          kl: 0.0064379378157253895
          policy_loss: -0.06425459086894988
          total_loss: -0.055916172762711845
          vf_explained_var: -0.45512399077415466
          vf_loss: 0.009113670095878964
    num_agent_steps_sampled: 288000
    num_agent_steps_trained: 288000
    num_steps_sampled: 288000
    num_steps_trained: 288

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,288,4288.88,288000,-9.9252,-8.51,-10,992.52


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 289000
  custom_metrics: {}
  date: 2021-10-23_21-11-33
  done: false
  episode_len_mean: 992.56
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 367
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.376405719917355e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.07527972422540188
          entropy_coeff: 0.009999999999999998
          kl: 0.003994222007329679
          policy_loss: -0.05662210020754072
          total_loss: -0.048062447706858316
          vf_explained_var: -0.6574019193649292
          vf_loss: 0.009312454413156956
    num_agent_steps_sampled: 289000
    num_agent_steps_trained: 289000
    num_steps_sampled: 289000
    num_steps_trained: 28900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,289,4300.33,289000,-9.9256,-8.51,-10,992.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 290000
  custom_metrics: {}
  date: 2021-10-23_21-11-45
  done: false
  episode_len_mean: 992.56
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 368
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.688202859958677e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.06743122670385572
          entropy_coeff: 0.009999999999999998
          kl: 0.00199063186494944
          policy_loss: -0.05348800586329566
          total_loss: -0.04510432051287757
          vf_explained_var: -0.6315310001373291
          vf_loss: 0.009058001484178627
    num_agent_steps_sampled: 290000
    num_agent_steps_trained: 290000
    num_steps_sampled: 290000
    num_steps_trained: 290000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,290,4311.83,290000,-9.9256,-8.51,-10,992.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 291000
  custom_metrics: {}
  date: 2021-10-23_21-11-57
  done: false
  episode_len_mean: 992.56
  episode_media: {}
  episode_reward_max: -8.509999999999863
  episode_reward_mean: -9.925599999999836
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 369
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8441014299793386e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.06923480236695873
          entropy_coeff: 0.009999999999999998
          kl: 0.0036247210971648907
          policy_loss: -0.04428502056333754
          total_loss: -0.036001081930266486
          vf_explained_var: -0.6589173078536987
          vf_loss: 0.00897628707979392
    num_agent_steps_sampled: 291000
    num_agent_steps_trained: 291000
    num_steps_sampled: 291000
    num_steps_trained: 2910

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,291,4323.63,291000,-9.9256,-8.51,-10,992.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 292000
  custom_metrics: {}
  date: 2021-10-23_21-12-08
  done: false
  episode_len_mean: 994.05
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.940499999999837
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 370
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.220507149896693e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.0675961876908938
          entropy_coeff: 0.009999999999999998
          kl: 0.0030469120926378914
          policy_loss: -0.05891740173101425
          total_loss: -0.05067825284269121
          vf_explained_var: -0.502925455570221
          vf_loss: 0.008915115385186962
    num_agent_steps_sampled: 292000
    num_agent_steps_trained: 292000
    num_steps_sampled: 292000
    num_steps_trained: 292000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,292,4335.19,292000,-9.9405,-8.58,-10,994.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 293000
  custom_metrics: {}
  date: 2021-10-23_21-12-20
  done: false
  episode_len_mean: 994.05
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.940499999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 371
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.610253574948347e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.06938945800065995
          entropy_coeff: 0.009999999999999998
          kl: 0.004596293772596659
          policy_loss: -0.04703348129987717
          total_loss: -0.03894883294900258
          vf_explained_var: -0.6458309888839722
          vf_loss: 0.008778547460678965
    num_agent_steps_sampled: 293000
    num_agent_steps_trained: 293000
    num_steps_sampled: 293000
    num_steps_trained: 293000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,293,4346.58,293000,-9.9405,-8.58,-10,994.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 294000
  custom_metrics: {}
  date: 2021-10-23_21-12-31
  done: false
  episode_len_mean: 994.09
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.940899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 372
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3051267874741733e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.06989801228046418
          entropy_coeff: 0.009999999999999998
          kl: 0.0020798881350654282
          policy_loss: -0.05251737518443002
          total_loss: -0.044114714364210765
          vf_explained_var: -0.6360417604446411
          vf_loss: 0.009101639573539917
    num_agent_steps_sampled: 294000
    num_agent_steps_trained: 294000
    num_steps_sampled: 294000
    num_steps_trained: 294

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,294,4357.8,294000,-9.9409,-8.58,-10,994.09


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 295000
  custom_metrics: {}
  date: 2021-10-23_21-12-42
  done: false
  episode_len_mean: 994.05
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.940499999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 373
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1525633937370866e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.06999839051730103
          entropy_coeff: 0.009999999999999998
          kl: 0.0020408741175843817
          policy_loss: -0.04561730408006244
          total_loss: -0.03745967331859801
          vf_explained_var: -0.6479684710502625
          vf_loss: 0.008857612571187525
    num_agent_steps_sampled: 295000
    num_agent_steps_trained: 295000
    num_steps_sampled: 295000
    num_steps_trained: 2950

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,295,4369.06,295000,-9.9405,-8.58,-10,994.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 296000
  custom_metrics: {}
  date: 2021-10-23_21-12-54
  done: false
  episode_len_mean: 994.05
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.940499999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 374
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.762816968685433e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.06893406266139614
          entropy_coeff: 0.009999999999999998
          kl: 0.001826535326351101
          policy_loss: -0.06670586665471395
          total_loss: -0.058711310558848914
          vf_explained_var: -0.5836233496665955
          vf_loss: 0.008683899458911683
    num_agent_steps_sampled: 296000
    num_agent_steps_trained: 296000
    num_steps_sampled: 296000
    num_steps_trained: 29600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,296,4380.35,296000,-9.9405,-8.58,-10,994.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 297000
  custom_metrics: {}
  date: 2021-10-23_21-13-05
  done: false
  episode_len_mean: 994.08
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.940799999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 375
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8814084843427166e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.06549119800329209
          entropy_coeff: 0.009999999999999998
          kl: 0.0015912212127457475
          policy_loss: -0.05964384658469094
          total_loss: -0.05180307792292701
          vf_explained_var: -0.5789408087730408
          vf_loss: 0.008495683741057292
    num_agent_steps_sampled: 297000
    num_agent_steps_trained: 297000
    num_steps_sampled: 297000
    num_steps_trained: 2970

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,297,4391.79,297000,-9.9408,-8.58,-10,994.08


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 298000
  custom_metrics: {}
  date: 2021-10-23_21-13-15
  done: false
  episode_len_mean: 994.16
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.941599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 376
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4407042421713583e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.06497292212314076
          entropy_coeff: 0.009999999999999998
          kl: 0.0025007027837596575
          policy_loss: -0.043800066577063666
          total_loss: -0.03599739174048106
          vf_explained_var: -0.36815303564071655
          vf_loss: 0.008452410860789112
    num_agent_steps_sampled: 298000
    num_agent_steps_trained: 298000
    num_steps_sampled: 298000
    num_steps_trained: 29

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,298,4401.76,298000,-9.9416,-8.58,-10,994.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 299000
  custom_metrics: {}
  date: 2021-10-23_21-13-26
  done: false
  episode_len_mean: 994.16
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.941599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 377
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.203521210856792e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.06538899896873368
          entropy_coeff: 0.009999999999999998
          kl: 0.001458390342304483
          policy_loss: -0.060494014620780946
          total_loss: -0.05284548633628421
          vf_explained_var: -0.34138116240501404
          vf_loss: 0.00830241493272802
    num_agent_steps_sampled: 299000
    num_agent_steps_trained: 299000
    num_steps_sampled: 299000
    num_steps_trained: 29900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,299,4412.92,299000,-9.9416,-8.58,-10,994.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 300000
  custom_metrics: {}
  date: 2021-10-23_21-13-38
  done: false
  episode_len_mean: 994.08
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.940799999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 378
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.601760605428396e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.06842184662818909
          entropy_coeff: 0.009999999999999998
          kl: 0.0011754739230834983
          policy_loss: -0.06717779520485136
          total_loss: -0.05991249101029502
          vf_explained_var: -0.44623100757598877
          vf_loss: 0.007949519235666634
    num_agent_steps_sampled: 300000
    num_agent_steps_trained: 300000
    num_steps_sampled: 300000
    num_steps_trained: 3000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,300,4424.46,300000,-9.9408,-8.58,-10,994.08


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 301000
  custom_metrics: {}
  date: 2021-10-23_21-13-49
  done: false
  episode_len_mean: 994.12
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.941199999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 379
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.800880302714198e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.06550677994059192
          entropy_coeff: 0.009999999999999998
          kl: 0.0004114902489365906
          policy_loss: -0.05043795481324196
          total_loss: -0.042928661819961335
          vf_explained_var: -0.43538495898246765
          vf_loss: 0.008164358342118148
    num_agent_steps_sampled: 301000
    num_agent_steps_trained: 301000
    num_steps_sampled: 301000
    num_steps_trained: 301

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,301,4435.98,301000,-9.9412,-8.58,-10,994.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 302000
  custom_metrics: {}
  date: 2021-10-23_21-14-01
  done: false
  episode_len_mean: 994.12
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.941199999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 380
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.00440151357099e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.07038437384698126
          entropy_coeff: 0.009999999999999998
          kl: 0.0015076280526247704
          policy_loss: -0.05499566164281633
          total_loss: -0.04682826548814774
          vf_explained_var: -0.4491322934627533
          vf_loss: 0.008871239035054006
    num_agent_steps_sampled: 302000
    num_agent_steps_trained: 302000
    num_steps_sampled: 302000
    num_steps_trained: 302000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,302,4447.29,302000,-9.9412,-8.58,-10,994.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 303000
  custom_metrics: {}
  date: 2021-10-23_21-14-12
  done: false
  episode_len_mean: 994.12
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.941199999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 381
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.502200756785495e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.06578379761841562
          entropy_coeff: 0.009999999999999998
          kl: 0.0017491863164145294
          policy_loss: -0.05356865922609965
          total_loss: -0.045999394688341355
          vf_explained_var: -0.5877641439437866
          vf_loss: 0.008227104702058972
    num_agent_steps_sampled: 303000
    num_agent_steps_trained: 303000
    num_steps_sampled: 303000
    num_steps_trained: 3030

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,303,4458.69,303000,-9.9412,-8.58,-10,994.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 304000
  custom_metrics: {}
  date: 2021-10-23_21-14-23
  done: false
  episode_len_mean: 994.15
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.941499999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 382
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2511003783927473e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.06969676357176569
          entropy_coeff: 0.009999999999999998
          kl: 0.0005991900273430575
          policy_loss: -0.06316283154818747
          total_loss: -0.055785325831837124
          vf_explained_var: -0.6605555415153503
          vf_loss: 0.00807446899319378
    num_agent_steps_sampled: 304000
    num_agent_steps_trained: 304000
    num_steps_sampled: 304000
    num_steps_trained: 3040

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,304,4470.07,304000,-9.9415,-8.58,-10,994.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 305000
  custom_metrics: {}
  date: 2021-10-23_21-14-35
  done: false
  episode_len_mean: 994.23
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.942299999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 383
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1255501891963737e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.07089736072553529
          entropy_coeff: 0.009999999999999998
          kl: 0.0007175215836873071
          policy_loss: -0.05255333259701729
          total_loss: -0.045459675540526705
          vf_explained_var: -0.6371729373931885
          vf_loss: 0.0078026308636051705
    num_agent_steps_sampled: 305000
    num_agent_steps_trained: 305000
    num_steps_sampled: 305000
    num_steps_trained: 30

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,305,4481.5,305000,-9.9423,-8.58,-10,994.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 306000
  custom_metrics: {}
  date: 2021-10-23_21-14-47
  done: false
  episode_len_mean: 994.23
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.942299999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 384
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.627750945981868e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.07436546422541142
          entropy_coeff: 0.009999999999999998
          kl: 0.00102623735438101
          policy_loss: -0.050679576893647514
          total_loss: -0.044037428498268125
          vf_explained_var: -0.45156416296958923
          vf_loss: 0.007385800009877938
    num_agent_steps_sampled: 306000
    num_agent_steps_trained: 306000
    num_steps_sampled: 306000
    num_steps_trained: 3060

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,306,4493.13,306000,-9.9423,-8.58,-10,994.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 307000
  custom_metrics: {}
  date: 2021-10-23_21-14-58
  done: false
  episode_len_mean: 994.27
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.942699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 385
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.813875472990934e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.06598493427866035
          entropy_coeff: 0.009999999999999998
          kl: 0.0014369775463516513
          policy_loss: -0.048088353210025366
          total_loss: -0.04106715669234594
          vf_explained_var: -0.6408274173736572
          vf_loss: 0.007681045186473057
    num_agent_steps_sampled: 307000
    num_agent_steps_trained: 307000
    num_steps_sampled: 307000
    num_steps_trained: 3070

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,307,4504.61,307000,-9.9427,-8.58,-10,994.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 308000
  custom_metrics: {}
  date: 2021-10-23_21-15-10
  done: false
  episode_len_mean: 994.31
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 386
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.406937736495467e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.06648621873723136
          entropy_coeff: 0.009999999999999998
          kl: 0.002434408664703369
          policy_loss: -0.044599946008788215
          total_loss: -0.03779399163193173
          vf_explained_var: -0.6272915601730347
          vf_loss: 0.007470814048105644
    num_agent_steps_sampled: 308000
    num_agent_steps_trained: 308000
    num_steps_sampled: 308000
    num_steps_trained: 30800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,308,4516.02,308000,-9.9431,-8.58,-10,994.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 309000
  custom_metrics: {}
  date: 2021-10-23_21-15-21
  done: false
  episode_len_mean: 994.31
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943099999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 387
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.034688682477335e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.07118869626687632
          entropy_coeff: 0.009999999999999998
          kl: 0.0012086406291928141
          policy_loss: -0.048365037308798896
          total_loss: -0.041668746206495494
          vf_explained_var: -0.6503114104270935
          vf_loss: 0.007408176634150247
    num_agent_steps_sampled: 309000
    num_agent_steps_trained: 309000
    num_steps_sampled: 309000
    num_steps_trained: 309

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,309,4527.49,309000,-9.9431,-8.58,-10,994.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 310000
  custom_metrics: {}
  date: 2021-10-23_21-15-32
  done: false
  episode_len_mean: 994.31
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 388
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.517344341238668e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.07790271354218324
          entropy_coeff: 0.009999999999999998
          kl: 0.0008699067848889779
          policy_loss: -0.05049575277500683
          total_loss: -0.043967466470268037
          vf_explained_var: -0.3795507550239563
          vf_loss: 0.007307311080189215
    num_agent_steps_sampled: 310000
    num_agent_steps_trained: 310000
    num_steps_sampled: 310000
    num_steps_trained: 3100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,310,4538.87,310000,-9.9431,-8.58,-10,994.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 311000
  custom_metrics: {}
  date: 2021-10-23_21-15-44
  done: false
  episode_len_mean: 994.39
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 389
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.758672170619334e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.0785723589360714
          entropy_coeff: 0.009999999999999998
          kl: 0.00162036846870453
          policy_loss: -0.05064524544609918
          total_loss: -0.04448277238342497
          vf_explained_var: -0.4774731993675232
          vf_loss: 0.006948194629512727
    num_agent_steps_sampled: 311000
    num_agent_steps_trained: 311000
    num_steps_sampled: 311000
    num_steps_trained: 311000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,311,4550.39,311000,-9.9439,-8.58,-10,994.39




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 312000
  custom_metrics: {}
  date: 2021-10-23_21-16-12
  done: false
  episode_len_mean: 993.01
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.930099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 390
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.79336085309667e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.08628580268058512
          entropy_coeff: 0.009999999999999998
          kl: 0.0052092044148594144
          policy_loss: -0.06558787276347479
          total_loss: -0.059093332787354784
          vf_explained_var: -0.6391447186470032
          vf_loss: 0.007357396986723567
    num_agent_steps_sampled: 312000
    num_agent_steps_trained: 312000
    num_steps_sampled: 312000
    num_steps_trained: 31200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,312,4578.49,312000,-9.9301,-8.58,-10,993.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 313000
  custom_metrics: {}
  date: 2021-10-23_21-16-25
  done: false
  episode_len_mean: 993.01
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.930099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 391
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.79336085309667e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.07908202037215233
          entropy_coeff: 0.009999999999999998
          kl: 0.004180639872922458
          policy_loss: -0.06276398566034105
          total_loss: -0.05697193543116252
          vf_explained_var: -0.6123859882354736
          vf_loss: 0.006582871444212894
    num_agent_steps_sampled: 313000
    num_agent_steps_trained: 313000
    num_steps_sampled: 313000
    num_steps_trained: 313000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,313,4591.87,313000,-9.9301,-8.58,-10,993.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 314000
  custom_metrics: {}
  date: 2021-10-23_21-16-37
  done: false
  episode_len_mean: 993.01
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.930099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 392
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.396680426548335e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.08891417520741622
          entropy_coeff: 0.009999999999999998
          kl: 0.005034469434111896
          policy_loss: -0.05181222822931078
          total_loss: -0.04590894977251689
          vf_explained_var: -0.562900960445404
          vf_loss: 0.006792417732584807
    num_agent_steps_sampled: 314000
    num_agent_steps_trained: 314000
    num_steps_sampled: 314000
    num_steps_trained: 314000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,314,4603.74,314000,-9.9301,-8.58,-10,993.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 315000
  custom_metrics: {}
  date: 2021-10-23_21-16-50
  done: false
  episode_len_mean: 993.01
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.930099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 393
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.396680426548335e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.08311207931902674
          entropy_coeff: 0.009999999999999998
          kl: 0.007196103877827732
          policy_loss: -0.054504304048087863
          total_loss: -0.04867549290259679
          vf_explained_var: -0.4410112202167511
          vf_loss: 0.006659933008227704
    num_agent_steps_sampled: 315000
    num_agent_steps_trained: 315000
    num_steps_sampled: 315000
    num_steps_trained: 31500

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,315,4616.49,315000,-9.9301,-8.58,-10,993.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 316000
  custom_metrics: {}
  date: 2021-10-23_21-17-02
  done: false
  episode_len_mean: 993.01
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.930099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 394
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.396680426548335e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.07533335660894712
          entropy_coeff: 0.009999999999999998
          kl: 0.0049111228507374306
          policy_loss: -0.04823766367303001
          total_loss: -0.04225484314892027
          vf_explained_var: -0.5211247205734253
          vf_loss: 0.0067361524347992
    num_agent_steps_sampled: 316000
    num_agent_steps_trained: 316000
    num_steps_sampled: 316000
    num_steps_trained: 316000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,316,4628.82,316000,-9.9301,-8.58,-10,993.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 317000
  custom_metrics: {}
  date: 2021-10-23_21-17-14
  done: false
  episode_len_mean: 992.97
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.929699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 395
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1983402132741673e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.07649682329760657
          entropy_coeff: 0.009999999999999998
          kl: 0.004987124490758611
          policy_loss: -0.029389851954248217
          total_loss: -0.023408423281378217
          vf_explained_var: -0.5265485048294067
          vf_loss: 0.006746395074555443
    num_agent_steps_sampled: 317000
    num_agent_steps_trained: 317000
    num_steps_sampled: 317000
    num_steps_trained: 317

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,317,4640.78,317000,-9.9297,-8.58,-10,992.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 318000
  custom_metrics: {}
  date: 2021-10-23_21-17-26
  done: false
  episode_len_mean: 992.97
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.929699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 396
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0991701066370837e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.07911059190001753
          entropy_coeff: 0.009999999999999998
          kl: 0.005257068433436669
          policy_loss: -0.044044906894365944
          total_loss: -0.03829006908668412
          vf_explained_var: -0.48472657799720764
          vf_loss: 0.006545943134632479
    num_agent_steps_sampled: 318000
    num_agent_steps_trained: 318000
    num_steps_sampled: 318000
    num_steps_trained: 318

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,318,4652.13,318000,-9.9297,-8.58,-10,992.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 319000
  custom_metrics: {}
  date: 2021-10-23_21-17-37
  done: false
  episode_len_mean: 992.97
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.929699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 397
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0991701066370837e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.08606314369373852
          entropy_coeff: 0.009999999999999998
          kl: 0.006671627365156181
          policy_loss: -0.033388413737217584
          total_loss: -0.02741252730290095
          vf_explained_var: -0.4678894579410553
          vf_loss: 0.006836518580611382
    num_agent_steps_sampled: 319000
    num_agent_steps_trained: 319000
    num_steps_sampled: 319000
    num_steps_trained: 3190

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,319,4662.78,319000,-9.9297,-8.58,-10,992.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 320000
  custom_metrics: {}
  date: 2021-10-23_21-17-47
  done: false
  episode_len_mean: 992.97
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.929699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 398
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0991701066370837e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.07819331859548886
          entropy_coeff: 0.009999999999999998
          kl: 0.003345466372815685
          policy_loss: -0.04864143679539363
          total_loss: -0.04300742977195316
          vf_explained_var: -0.5392016768455505
          vf_loss: 0.006415941431381119
    num_agent_steps_sampled: 320000
    num_agent_steps_trained: 320000
    num_steps_sampled: 320000
    num_steps_trained: 32000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,320,4673.02,320000,-9.9297,-8.58,-10,992.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 321000
  custom_metrics: {}
  date: 2021-10-23_21-17-58
  done: false
  episode_len_mean: 992.97
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.929699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 399
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.495850533185418e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.07824998514519797
          entropy_coeff: 0.009999999999999998
          kl: 0.006581892565009184
          policy_loss: -0.03979963519506984
          total_loss: -0.03430282688803143
          vf_explained_var: -0.19347412884235382
          vf_loss: 0.006279308542919655
    num_agent_steps_sampled: 321000
    num_agent_steps_trained: 321000
    num_steps_sampled: 321000
    num_steps_trained: 32100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,321,4684.55,321000,-9.9297,-8.58,-10,992.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 322000
  custom_metrics: {}
  date: 2021-10-23_21-18-10
  done: false
  episode_len_mean: 994.39
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 400
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.495850533185418e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.06584839742216798
          entropy_coeff: 0.009999999999999998
          kl: 0.0038744161946750763
          policy_loss: -0.029104055215915043
          total_loss: -0.02294823072022862
          vf_explained_var: -0.08692111074924469
          vf_loss: 0.006814309843078566
    num_agent_steps_sampled: 322000
    num_agent_steps_trained: 322000
    num_steps_sampled: 322000
    num_steps_trained: 322

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,322,4696.46,322000,-9.9439,-8.58,-10,994.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 323000
  custom_metrics: {}
  date: 2021-10-23_21-18-22
  done: false
  episode_len_mean: 994.39
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 401
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.747925266592709e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.08245070406960117
          entropy_coeff: 0.009999999999999998
          kl: 0.007115741033986624
          policy_loss: -0.016740260190433925
          total_loss: -0.010021407985024981
          vf_explained_var: -0.4588274657726288
          vf_loss: 0.007543357275426388
    num_agent_steps_sampled: 323000
    num_agent_steps_trained: 323000
    num_steps_sampled: 323000
    num_steps_trained: 3230

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,323,4708.48,323000,-9.9439,-8.58,-10,994.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 324000
  custom_metrics: {}
  date: 2021-10-23_21-18-34
  done: false
  episode_len_mean: 994.39
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 402
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.747925266592709e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.08247913933462567
          entropy_coeff: 0.009999999999999998
          kl: 0.006692985954901411
          policy_loss: -0.03104482102725241
          total_loss: -0.025300217585431203
          vf_explained_var: -0.1882454752922058
          vf_loss: 0.0065693935680125526
    num_agent_steps_sampled: 324000
    num_agent_steps_trained: 324000
    num_steps_sampled: 324000
    num_steps_trained: 3240

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,324,4720.3,324000,-9.9439,-8.58,-10,994.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 325000
  custom_metrics: {}
  date: 2021-10-23_21-18-46
  done: false
  episode_len_mean: 994.39
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 403
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.747925266592709e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.05365328772200478
          entropy_coeff: 0.009999999999999998
          kl: 0.004851915563793025
          policy_loss: -0.06063873055908415
          total_loss: -0.053788313931889
          vf_explained_var: -0.28766536712646484
          vf_loss: 0.007386949147459947
    num_agent_steps_sampled: 325000
    num_agent_steps_trained: 325000
    num_steps_sampled: 325000
    num_steps_trained: 325000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,325,4732.04,325000,-9.9439,-8.58,-10,994.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 326000
  custom_metrics: {}
  date: 2021-10-23_21-18-58
  done: false
  episode_len_mean: 994.35
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943499999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 404
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3739626332963546e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.06638136650953028
          entropy_coeff: 0.009999999999999998
          kl: 0.006664166739210486
          policy_loss: -0.039307812187406754
          total_loss: -0.03343215535084407
          vf_explained_var: -0.5344120860099792
          vf_loss: 0.006539468346939733
    num_agent_steps_sampled: 326000
    num_agent_steps_trained: 326000
    num_steps_sampled: 326000
    num_steps_trained: 3260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,326,4743.79,326000,-9.9435,-8.58,-10,994.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 327000
  custom_metrics: {}
  date: 2021-10-23_21-19-08
  done: false
  episode_len_mean: 994.35
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943499999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 405
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3739626332963546e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.06574652414355013
          entropy_coeff: 0.009999999999999998
          kl: 0.004547764446922681
          policy_loss: -0.03573622703552246
          total_loss: -0.02991409848133723
          vf_explained_var: -0.44205623865127563
          vf_loss: 0.006479591431949909
    num_agent_steps_sampled: 327000
    num_agent_steps_trained: 327000
    num_steps_sampled: 327000
    num_steps_trained: 3270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,327,4754.56,327000,-9.9435,-8.58,-10,994.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 328000
  custom_metrics: {}
  date: 2021-10-23_21-19-19
  done: false
  episode_len_mean: 994.35
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943499999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 406
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.869813166481773e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.13694931185907788
          entropy_coeff: 0.009999999999999998
          kl: 0.019405210341533852
          policy_loss: -0.03627724928988351
          total_loss: -0.031257091131475234
          vf_explained_var: -0.4916510581970215
          vf_loss: 0.006389652292192396
    num_agent_steps_sampled: 328000
    num_agent_steps_trained: 328000
    num_steps_sampled: 328000
    num_steps_trained: 32800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,328,4765.23,328000,-9.9435,-8.58,-10,994.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 329000
  custom_metrics: {}
  date: 2021-10-23_21-19-30
  done: false
  episode_len_mean: 994.35
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943499999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 407
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.869813166481773e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.06142089486949974
          entropy_coeff: 0.009999999999999998
          kl: 0.005259277001540694
          policy_loss: -0.07144533751739396
          total_loss: -0.06319977086451319
          vf_explained_var: -0.1633194535970688
          vf_loss: 0.008859779167040768
    num_agent_steps_sampled: 329000
    num_agent_steps_trained: 329000
    num_steps_sampled: 329000
    num_steps_trained: 329000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,329,4775.88,329000,-9.9435,-8.58,-10,994.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 330000
  custom_metrics: {}
  date: 2021-10-23_21-19-42
  done: false
  episode_len_mean: 994.35
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943499999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 408
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.869813166481773e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.06783366170194415
          entropy_coeff: 0.009999999999999998
          kl: 0.005606259293724886
          policy_loss: -0.022833262218369378
          total_loss: -0.016479708088768853
          vf_explained_var: 0.02277630753815174
          vf_loss: 0.0070318884785390565
    num_agent_steps_sampled: 330000
    num_agent_steps_trained: 330000
    num_steps_sampled: 330000
    num_steps_trained: 330

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,330,4787.58,330000,-9.9435,-8.58,-10,994.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 331000
  custom_metrics: {}
  date: 2021-10-23_21-19-54
  done: false
  episode_len_mean: 994.31
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 409
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.869813166481773e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.07262194852034251
          entropy_coeff: 0.009999999999999998
          kl: 0.003293736228564133
          policy_loss: -0.03121875507964028
          total_loss: -0.025776098171869915
          vf_explained_var: -0.2514213025569916
          vf_loss: 0.006168879141558945
    num_agent_steps_sampled: 331000
    num_agent_steps_trained: 331000
    num_steps_sampled: 331000
    num_steps_trained: 33100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,331,4799.65,331000,-9.9431,-8.58,-10,994.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 332000
  custom_metrics: {}
  date: 2021-10-23_21-20-05
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.944299999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 410
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4349065832408865e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.08753479603264067
          entropy_coeff: 0.009999999999999998
          kl: 0.006338347005657852
          policy_loss: -0.045564031932089064
          total_loss: -0.04003116190433502
          vf_explained_var: -0.5875749588012695
          vf_loss: 0.006408217901157008
    num_agent_steps_sampled: 332000
    num_agent_steps_trained: 332000
    num_steps_sampled: 332000
    num_steps_trained: 3320

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,332,4810.98,332000,-9.9443,-8.58,-10,994.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 333000
  custom_metrics: {}
  date: 2021-10-23_21-20-17
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.944699999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 411
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4349065832408865e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.10905260004931026
          entropy_coeff: 0.009999999999999998
          kl: 0.00741380023666554
          policy_loss: -0.05061701734860738
          total_loss: -0.04563331769572364
          vf_explained_var: -0.6036766767501831
          vf_loss: 0.006074221319027452
    num_agent_steps_sampled: 333000
    num_agent_steps_trained: 333000
    num_steps_sampled: 333000
    num_steps_trained: 333000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,333,4822.61,333000,-9.9447,-8.58,-10,994.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 334000
  custom_metrics: {}
  date: 2021-10-23_21-20-28
  done: false
  episode_len_mean: 994.47
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.944699999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 412
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4349065832408865e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.20099519747826788
          entropy_coeff: 0.009999999999999998
          kl: 0.02780507040572249
          policy_loss: -0.05826468285587099
          total_loss: -0.05284960029853715
          vf_explained_var: -0.5365952849388123
          vf_loss: 0.007425035822800257
    num_agent_steps_sampled: 334000
    num_agent_steps_trained: 334000
    num_steps_sampled: 334000
    num_steps_trained: 334000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,334,4833.56,334000,-9.9447,-8.58,-10,994.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 335000
  custom_metrics: {}
  date: 2021-10-23_21-20-39
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.944299999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 413
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.152359874861331e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.27147038214736513
          entropy_coeff: 0.009999999999999998
          kl: 0.05948391063138843
          policy_loss: 0.029213835795720417
          total_loss: 0.033036045812898214
          vf_explained_var: -0.16279920935630798
          vf_loss: 0.006536910121536089
    num_agent_steps_sampled: 335000
    num_agent_steps_trained: 335000
    num_steps_sampled: 335000
    num_steps_trained: 335000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,335,4844.53,335000,-9.9443,-8.58,-10,994.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 336000
  custom_metrics: {}
  date: 2021-10-23_21-20-51
  done: false
  episode_len_mean: 994.43
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.944299999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 414
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.728539812291995e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.2927721952398618
          entropy_coeff: 0.009999999999999998
          kl: 0.05006355266604159
          policy_loss: -0.034997204318642616
          total_loss: -0.03121947165992525
          vf_explained_var: -0.34115540981292725
          vf_loss: 0.006705455154749669
    num_agent_steps_sampled: 336000
    num_agent_steps_trained: 336000
    num_steps_sampled: 336000
    num_steps_trained: 336000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,336,4856.4,336000,-9.9443,-8.58,-10,994.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 337000
  custom_metrics: {}
  date: 2021-10-23_21-21-03
  done: false
  episode_len_mean: 994.38
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943799999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 415
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1592809718437993e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.17959604271584087
          entropy_coeff: 0.009999999999999998
          kl: 0.016924225520859988
          policy_loss: -0.012337564511431589
          total_loss: -0.0067762894762886895
          vf_explained_var: -0.6534672379493713
          vf_loss: 0.0073572347762011405
    num_agent_steps_sampled: 337000
    num_agent_steps_trained: 337000
    num_steps_sampled: 337000
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,337,4868.74,337000,-9.9438,-8.58,-10,994.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 338000
  custom_metrics: {}
  date: 2021-10-23_21-21-15
  done: false
  episode_len_mean: 994.38
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.943799999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 416
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1592809718437993e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.22188078223003282
          entropy_coeff: 0.009999999999999998
          kl: 0.022403964534815815
          policy_loss: -0.04807811660899056
          total_loss: -0.04457115630308787
          vf_explained_var: -0.5570550560951233
          vf_loss: 0.005725772123939047
    num_agent_steps_sampled: 338000
    num_agent_steps_trained: 338000
    num_steps_sampled: 338000
    num_steps_trained: 33800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,338,4881.16,338000,-9.9438,-8.58,-10,994.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 339000
  custom_metrics: {}
  date: 2021-10-23_21-21-27
  done: false
  episode_len_mean: 994.42
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.944199999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 417
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7389214577656988e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.27214097364081274
          entropy_coeff: 0.009999999999999998
          kl: 0.01271139115302099
          policy_loss: -0.052691247686743735
          total_loss: -0.049309115608533224
          vf_explained_var: -0.5319527983665466
          vf_loss: 0.00610353987560504
    num_agent_steps_sampled: 339000
    num_agent_steps_trained: 339000
    num_steps_sampled: 339000
    num_steps_trained: 33900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,339,4892.77,339000,-9.9442,-8.58,-10,994.42


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 340000
  custom_metrics: {}
  date: 2021-10-23_21-21-38
  done: false
  episode_len_mean: 994.18
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.941799999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 418
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7389214577656988e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.38007265163792503
          entropy_coeff: 0.009999999999999998
          kl: 0.038639752101153134
          policy_loss: -0.031247735189066993
          total_loss: -0.02754095858997769
          vf_explained_var: -0.4350646436214447
          vf_loss: 0.0075075006215936605
    num_agent_steps_sampled: 340000
    num_agent_steps_trained: 340000
    num_steps_sampled: 340000
    num_steps_trained: 340

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,340,4903.54,340000,-9.9418,-8.58,-10,994.18


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 341000
  custom_metrics: {}
  date: 2021-10-23_21-21-50
  done: false
  episode_len_mean: 994.14
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.941399999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 419
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.608382186648547e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.17184480155507723
          entropy_coeff: 0.009999999999999998
          kl: 0.016762356592031815
          policy_loss: -0.015456706202692455
          total_loss: -0.011418992198175854
          vf_explained_var: -0.07121247053146362
          vf_loss: 0.005756165549650582
    num_agent_steps_sampled: 341000
    num_agent_steps_trained: 341000
    num_steps_sampled: 341000
    num_steps_trained: 341

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,341,4915.6,341000,-9.9414,-8.58,-10,994.14




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 342000
  custom_metrics: {}
  date: 2021-10-23_21-22-19
  done: false
  episode_len_mean: 992.77
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.927699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 420
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.608382186648547e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.15419350183672376
          entropy_coeff: 0.009999999999999998
          kl: 0.033876574908693634
          policy_loss: -0.06278328701025909
          total_loss: -0.054241033602092
          vf_explained_var: -0.02148585394024849
          vf_loss: 0.010084191019025942
    num_agent_steps_sampled: 342000
    num_agent_steps_trained: 342000
    num_steps_sampled: 342000
    num_steps_trained: 342000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,342,4944.89,342000,-9.9277,-8.58,-10,992.77


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 343000
  custom_metrics: {}
  date: 2021-10-23_21-22-31
  done: false
  episode_len_mean: 992.77
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.927699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 421
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.912573279972822e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.1782377239730623
          entropy_coeff: 0.009999999999999998
          kl: 0.014285568217746913
          policy_loss: -0.062443526337544124
          total_loss: -0.0574149908290969
          vf_explained_var: -0.5635004639625549
          vf_loss: 0.006810911641999458
    num_agent_steps_sampled: 343000
    num_agent_steps_trained: 343000
    num_steps_sampled: 343000
    num_steps_trained: 343000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,343,4956.86,343000,-9.9277,-8.58,-10,992.77


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 344000
  custom_metrics: {}
  date: 2021-10-23_21-22-42
  done: false
  episode_len_mean: 992.81
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.928099999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 422
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.912573279972822e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.11794100768036313
          entropy_coeff: 0.009999999999999998
          kl: 0.014168603059887472
          policy_loss: -0.07061186366611057
          total_loss: -0.06555675831105974
          vf_explained_var: 0.22428366541862488
          vf_loss: 0.006234511072074788
    num_agent_steps_sampled: 344000
    num_agent_steps_trained: 344000
    num_steps_sampled: 344000
    num_steps_trained: 344000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,344,4967.52,344000,-9.9281,-8.58,-10,992.81


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 345000
  custom_metrics: {}
  date: 2021-10-23_21-22-53
  done: false
  episode_len_mean: 992.77
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.927699999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 423
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.912573279972822e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.19530979146560032
          entropy_coeff: 0.009999999999999998
          kl: 0.03372798328184419
          policy_loss: -0.04803044696648916
          total_loss: -0.04247820410463545
          vf_explained_var: -0.5887956619262695
          vf_loss: 0.007505342511123874
    num_agent_steps_sampled: 345000
    num_agent_steps_trained: 345000
    num_steps_sampled: 345000
    num_steps_trained: 345000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,345,4978.27,345000,-9.9277,-8.58,-10,992.77


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 346000
  custom_metrics: {}
  date: 2021-10-23_21-23-04
  done: false
  episode_len_mean: 992.69
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.926899999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 424
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.868859919959231e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.20470003220770094
          entropy_coeff: 0.009999999999999998
          kl: 0.027795150361230803
          policy_loss: -0.03530088480975893
          total_loss: -0.0309885925716824
          vf_explained_var: -0.6099328994750977
          vf_loss: 0.006359288670743505
    num_agent_steps_sampled: 346000
    num_agent_steps_trained: 346000
    num_steps_sampled: 346000
    num_steps_trained: 346000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,346,4989.38,346000,-9.9269,-8.58,-10,992.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 347000
  custom_metrics: {}
  date: 2021-10-23_21-23-16
  done: false
  episode_len_mean: 992.33
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.923299999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 425
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.803289879938851e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.210352813369698
          entropy_coeff: 0.009999999999999998
          kl: 0.030520106189780766
          policy_loss: -0.049870594176981184
          total_loss: -0.04467934949530496
          vf_explained_var: -0.6028175950050354
          vf_loss: 0.0072947772840658825
    num_agent_steps_sampled: 347000
    num_agent_steps_trained: 347000
    num_steps_sampled: 347000
    num_steps_trained: 347000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,347,5001.47,347000,-9.9233,-8.58,-10,992.33


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 348000
  custom_metrics: {}
  date: 2021-10-23_21-23-28
  done: false
  episode_len_mean: 992.29
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.922899999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 426
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3204934819908276e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.28981412798166273
          entropy_coeff: 0.009999999999999998
          kl: 0.20773035493782824
          policy_loss: -0.02293130676779482
          total_loss: -0.015933600275052917
          vf_explained_var: -0.25372040271759033
          vf_loss: 0.009895848790701064
    num_agent_steps_sampled: 348000
    num_agent_steps_trained: 348000
    num_steps_sampled: 348000
    num_steps_trained: 3480

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,348,5013.78,348000,-9.9229,-8.58,-10,992.29


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 349000
  custom_metrics: {}
  date: 2021-10-23_21-23-40
  done: false
  episode_len_mean: 992.09
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.920899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 427
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9807402229862413e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.1490552647246255
          entropy_coeff: 0.009999999999999998
          kl: 0.01148468438639409
          policy_loss: -0.042305807189808954
          total_loss: -0.03865392671691047
          vf_explained_var: -0.5159086585044861
          vf_loss: 0.005142432024391989
    num_agent_steps_sampled: 349000
    num_agent_steps_trained: 349000
    num_steps_sampled: 349000
    num_steps_trained: 349000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,349,5025.46,349000,-9.9209,-8.58,-10,992.09


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 350000
  custom_metrics: {}
  date: 2021-10-23_21-23-51
  done: false
  episode_len_mean: 992.05
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.920499999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 428
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9807402229862413e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.12236115179128117
          entropy_coeff: 0.009999999999999998
          kl: 0.0062616225125061145
          policy_loss: -0.022929469992717107
          total_loss: -0.01824628793530994
          vf_explained_var: -0.537717342376709
          vf_loss: 0.0059067903882047785
    num_agent_steps_sampled: 350000
    num_agent_steps_trained: 350000
    num_steps_sampled: 350000
    num_steps_trained: 350

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,350,5036.63,350000,-9.9205,-8.58,-10,992.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 351000
  custom_metrics: {}
  date: 2021-10-23_21-24-03
  done: false
  episode_len_mean: 992.01
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.920099999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 429
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9807402229862413e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.12331798697511355
          entropy_coeff: 0.009999999999999998
          kl: 0.01643339770897809
          policy_loss: -0.03392902811368306
          total_loss: -0.030086404002375073
          vf_explained_var: -0.48941558599472046
          vf_loss: 0.005075801581713475
    num_agent_steps_sampled: 351000
    num_agent_steps_trained: 351000
    num_steps_sampled: 351000
    num_steps_trained: 3510

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,351,5048.36,351000,-9.9201,-8.58,-10,992.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 352000
  custom_metrics: {}
  date: 2021-10-23_21-24-13
  done: false
  episode_len_mean: 993.26
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.932599999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 430
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9807402229862413e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.40790512926048705
          entropy_coeff: 0.009999999999999998
          kl: 0.10250167105760839
          policy_loss: -0.043862129085593754
          total_loss: -0.0432386201288965
          vf_explained_var: -0.42669805884361267
          vf_loss: 0.004702556390677475
    num_agent_steps_sampled: 352000
    num_agent_steps_trained: 352000
    num_steps_sampled: 352000
    num_steps_trained: 35200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,352,5058.81,352000,-9.9326,-8.58,-10,993.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 353000
  custom_metrics: {}
  date: 2021-10-23_21-24-24
  done: false
  episode_len_mean: 993.3
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.932999999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 431
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9711103344793613e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.19835185292694305
          entropy_coeff: 0.009999999999999998
          kl: 0.07868230800247855
          policy_loss: -0.07450566130379836
          total_loss: -0.06991753818260299
          vf_explained_var: -0.261719673871994
          vf_loss: 0.006571641445366872
    num_agent_steps_sampled: 353000
    num_agent_steps_trained: 353000
    num_steps_sampled: 353000
    num_steps_trained: 353000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,353,5069.98,353000,-9.933,-8.58,-10,993.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 354000
  custom_metrics: {}
  date: 2021-10-23_21-24-35
  done: false
  episode_len_mean: 993.02
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.930199999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 433
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.456665501719043e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.3149950666560067
          entropy_coeff: 0.009999999999999998
          kl: 0.04323928492764632
          policy_loss: 0.10057819899585511
          total_loss: 0.10087434699138005
          vf_explained_var: -0.3966284394264221
          vf_loss: 0.0034460988943465056
    num_agent_steps_sampled: 354000
    num_agent_steps_trained: 354000
    num_steps_sampled: 354000
    num_steps_trained: 354000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,354,5080.89,354000,-9.9302,-8.58,-10,993.02


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 355000
  custom_metrics: {}
  date: 2021-10-23_21-24-46
  done: false
  episode_len_mean: 992.87
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.928699999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 434
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.684998252578565e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.24745337383614646
          entropy_coeff: 0.009999999999999998
          kl: 0.040796214202418926
          policy_loss: 0.11657252932588259
          total_loss: 0.11579702479971779
          vf_explained_var: -0.6778632998466492
          vf_loss: 0.001699031069680738
    num_agent_steps_sampled: 355000
    num_agent_steps_trained: 355000
    num_steps_sampled: 355000
    num_steps_trained: 355000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,355,5091.38,355000,-9.9287,-8.58,-10,992.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 356000
  custom_metrics: {}
  date: 2021-10-23_21-24-57
  done: false
  episode_len_mean: 992.52
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.925199999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 435
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0027497378867846e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.3936055236392551
          entropy_coeff: 0.009999999999999998
          kl: 0.13307594512071874
          policy_loss: 0.11547597780202826
          total_loss: 0.11388701157023509
          vf_explained_var: -0.3723178803920746
          vf_loss: 0.0023470896276800582
    num_agent_steps_sampled: 356000
    num_agent_steps_trained: 356000
    num_steps_sampled: 356000
    num_steps_trained: 356000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,356,5102.91,356000,-9.9252,-8.58,-10,992.52


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 357000
  custom_metrics: {}
  date: 2021-10-23_21-25-10
  done: false
  episode_len_mean: 992.35
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.923499999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 436
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5041246068301768e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.29136397060420777
          entropy_coeff: 0.009999999999999998
          kl: 0.05169634247819583
          policy_loss: 0.1295211044450601
          total_loss: 0.1279777268982596
          vf_explained_var: -0.14523965120315552
          vf_loss: 0.001370259922178876
    num_agent_steps_sampled: 357000
    num_agent_steps_trained: 357000
    num_steps_sampled: 357000
    num_steps_trained: 357000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,357,5115.12,357000,-9.9235,-8.58,-10,992.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 358000
  custom_metrics: {}
  date: 2021-10-23_21-25-22
  done: false
  episode_len_mean: 992.2
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.921999999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 437
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2561869102452655e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.3031186994579103
          entropy_coeff: 0.009999999999999998
          kl: 0.0519412798807025
          policy_loss: 0.1142962800959746
          total_loss: 0.11267661973834038
          vf_explained_var: -0.5636222958564758
          vf_loss: 0.001411527560816871
    num_agent_steps_sampled: 358000
    num_agent_steps_trained: 358000
    num_steps_sampled: 358000
    num_steps_trained: 358000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,358,5127.16,358000,-9.922,-8.58,-10,992.2


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 359000
  custom_metrics: {}
  date: 2021-10-23_21-25-33
  done: false
  episode_len_mean: 991.97
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.919699999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 438
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.384280365367898e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.28072571290863885
          entropy_coeff: 0.009999999999999998
          kl: 0.07000775792532497
          policy_loss: 0.17944478098717
          total_loss: 0.17744939323100778
          vf_explained_var: -0.4230192005634308
          vf_loss: 0.0008118701164170893
    num_agent_steps_sampled: 359000
    num_agent_steps_trained: 359000
    num_steps_sampled: 359000
    num_steps_trained: 359000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,359,5138,359000,-9.9197,-8.58,-10,991.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 360000
  custom_metrics: {}
  date: 2021-10-23_21-25-43
  done: false
  episode_len_mean: 991.64
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.916399999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 439
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.076420548051848e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.455868994196256
          entropy_coeff: 0.009999999999999998
          kl: 0.16301919495066006
          policy_loss: 0.16927586396535238
          total_loss: 0.1661683976650238
          vf_explained_var: -0.8065885901451111
          vf_loss: 0.0014512216743443988
    num_agent_steps_sampled: 360000
    num_agent_steps_trained: 360000
    num_steps_sampled: 360000
    num_steps_trained: 360000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,360,5148.45,360000,-9.9164,-8.58,-10,991.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 361000
  custom_metrics: {}
  date: 2021-10-23_21-25-54
  done: false
  episode_len_mean: 991.42
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.914199999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 440
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.614630822077771e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.5444592790471183
          entropy_coeff: 0.009999999999999998
          kl: 0.3796251114043925
          policy_loss: 0.12942661792039872
          total_loss: 0.13156955680913396
          vf_explained_var: -0.5482552647590637
          vf_loss: 0.007587532409363323
    num_agent_steps_sampled: 361000
    num_agent_steps_trained: 361000
    num_steps_sampled: 361000
    num_steps_trained: 361000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,361,5159.14,361000,-9.9142,-8.58,-10,991.42


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 362000
  custom_metrics: {}
  date: 2021-10-23_21-26-05
  done: false
  episode_len_mean: 991.38
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.913799999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 441
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1421946233116656e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.3713941752910614
          entropy_coeff: 0.009999999999999998
          kl: 0.34365870207548144
          policy_loss: 0.11716855789224306
          total_loss: 0.11510018416576916
          vf_explained_var: -1.0
          vf_loss: 0.0016455672028112328
    num_agent_steps_sampled: 362000
    num_agent_steps_trained: 362000
    num_steps_sampled: 362000
    num_steps_trained: 362000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,362,5170.73,362000,-9.9138,-8.58,-10,991.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 363000
  custom_metrics: {}
  date: 2021-10-23_21-26-16
  done: false
  episode_len_mean: 991.3
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.912999999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 442
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.713291934967498e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.47766493724452125
          entropy_coeff: 0.009999999999999998
          kl: 0.90708037548595
          policy_loss: -0.008930720864898628
          total_loss: 0.001102836812949843
          vf_explained_var: -0.9995505213737488
          vf_loss: 0.014810208491204927
    num_agent_steps_sampled: 363000
    num_agent_steps_trained: 363000
    num_steps_sampled: 363000
    num_steps_trained: 363000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,363,5181.17,363000,-9.913,-8.58,-10,991.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 364000
  custom_metrics: {}
  date: 2021-10-23_21-26-26
  done: false
  episode_len_mean: 991.26
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.912599999999832
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 443
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5699379024512477e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.5183383646938536
          entropy_coeff: 0.009999999999999998
          kl: 0.24883003847466575
          policy_loss: 0.07843572898871369
          total_loss: 0.07909903596672747
          vf_explained_var: -1.0
          vf_loss: 0.005846690980251879
    num_agent_steps_sampled: 364000
    num_agent_steps_trained: 364000
    num_steps_sampled: 364000
    num_steps_trained: 364000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,364,5191.72,364000,-9.9126,-8.58,-10,991.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 365000
  custom_metrics: {}
  date: 2021-10-23_21-26-38
  done: false
  episode_len_mean: 990.46
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.904599999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 444
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.854906853676872e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.287508068813218
          entropy_coeff: 0.009999999999999998
          kl: 0.1556658082538181
          policy_loss: -0.05487518743094471
          total_loss: -0.03873928685983022
          vf_explained_var: -0.6336991786956787
          vf_loss: 0.019010981989817488
    num_agent_steps_sampled: 365000
    num_agent_steps_trained: 365000
    num_steps_sampled: 365000
    num_steps_trained: 365000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,365,5203.05,365000,-9.9046,-8.58,-10,990.46


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 366000
  custom_metrics: {}
  date: 2021-10-23_21-26-49
  done: false
  episode_len_mean: 990.46
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.904599999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 445
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.78236028051531e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.5029326962100135
          entropy_coeff: 0.009999999999999998
          kl: 0.24674475838740667
          policy_loss: -0.07671674970123503
          total_loss: -0.060592293159829246
          vf_explained_var: -0.646818220615387
          vf_loss: 0.021153782416755956
    num_agent_steps_sampled: 366000
    num_agent_steps_trained: 366000
    num_steps_sampled: 366000
    num_steps_trained: 366000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,366,5213.91,366000,-9.9046,-8.58,-10,990.46


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 367000
  custom_metrics: {}
  date: 2021-10-23_21-26-59
  done: false
  episode_len_mean: 989.99
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.899899999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 446
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.673540420772961e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.5090891877810161
          entropy_coeff: 0.009999999999999998
          kl: 0.29056580571664703
          policy_loss: -0.02966183837917116
          total_loss: -0.01628146784173118
          vf_explained_var: -0.2770635783672333
          vf_loss: 0.018471264771263426
    num_agent_steps_sampled: 367000
    num_agent_steps_trained: 367000
    num_steps_sampled: 367000
    num_steps_trained: 367000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,367,5224.67,367000,-9.8999,-8.58,-10,989.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 368000
  custom_metrics: {}
  date: 2021-10-23_21-27-13
  done: false
  episode_len_mean: 989.83
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.898299999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 447
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3010310631159442e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.6338705685403612
          entropy_coeff: 0.009999999999999998
          kl: 0.3365293052461412
          policy_loss: -0.05867120805713866
          total_loss: -0.04744947204987208
          vf_explained_var: -0.6941773295402527
          vf_loss: 0.01756044472179686
    num_agent_steps_sampled: 368000
    num_agent_steps_trained: 368000
    num_steps_sampled: 368000
    num_steps_trained: 368000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,368,5237.92,368000,-9.8983,-8.58,-10,989.83


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 369000
  custom_metrics: {}
  date: 2021-10-23_21-27-26
  done: false
  episode_len_mean: 989.44
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.894399999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 448
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9515465946739162e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.21267654498418173
          entropy_coeff: 0.009999999999999998
          kl: 0.159017783900102
          policy_loss: -0.006454496830701828
          total_loss: 0.003497029178672367
          vf_explained_var: -0.40970247983932495
          vf_loss: 0.012078289637186875
    num_agent_steps_sampled: 369000
    num_agent_steps_trained: 369000
    num_steps_sampled: 369000
    num_steps_trained: 369000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,369,5250.9,369000,-9.8944,-8.58,-10,989.44


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 370000
  custom_metrics: {}
  date: 2021-10-23_21-27-36
  done: false
  episode_len_mean: 989.04
  episode_media: {}
  episode_reward_max: -8.579999999999862
  episode_reward_mean: -9.890399999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 449
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9273198920108747e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.3899271769656075
          entropy_coeff: 0.009999999999999998
          kl: 0.18412326549490293
          policy_loss: 0.08169152852561738
          total_loss: 0.08690480540196101
          vf_explained_var: -0.4380030035972595
          vf_loss: 0.00911255068042212
    num_agent_steps_sampled: 370000
    num_agent_steps_trained: 370000
    num_steps_sampled: 370000
    num_steps_trained: 370000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,370,5261.63,370000,-9.8904,-8.58,-10,989.04




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 371000
  custom_metrics: {}
  date: 2021-10-23_21-28-04
  done: false
  episode_len_mean: 987.46
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.874599999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 450
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.390979838016312e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.4128801660405265
          entropy_coeff: 0.009999999999999998
          kl: 0.15739588108327654
          policy_loss: -0.04960265631477038
          total_loss: -0.04428753832148181
          vf_explained_var: -0.20774689316749573
          vf_loss: 0.009443919356110402
    num_agent_steps_sampled: 371000
    num_agent_steps_trained: 371000
    num_steps_sampled: 371000
    num_steps_trained: 371000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,371,5289.61,371000,-9.8746,-8.38,-10,987.46


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 372000
  custom_metrics: {}
  date: 2021-10-23_21-28-18
  done: false
  episode_len_mean: 987.34
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.873399999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 451
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.586469757024469e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.3781992478503121
          entropy_coeff: 0.009999999999999998
          kl: 0.4954432601730029
          policy_loss: -0.1343584375249015
          total_loss: -0.13414316657516692
          vf_explained_var: -0.7019317150115967
          vf_loss: 0.003997268316905118
    num_agent_steps_sampled: 372000
    num_agent_steps_trained: 372000
    num_steps_sampled: 372000
    num_steps_trained: 372000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,372,5303.31,372000,-9.8734,-8.38,-10,987.34


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 373000
  custom_metrics: {}
  date: 2021-10-23_21-28-31
  done: false
  episode_len_mean: 987.18
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.871799999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 452
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.8797046355367e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.34469553960694205
          entropy_coeff: 0.009999999999999998
          kl: 0.5450506445434359
          policy_loss: -0.09646599408653048
          total_loss: -0.09205915588471625
          vf_explained_var: -0.5902977585792542
          vf_loss: 0.007853796688788052
    num_agent_steps_sampled: 373000
    num_agent_steps_trained: 373000
    num_steps_sampled: 373000
    num_steps_trained: 373000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,373,5315.8,373000,-9.8718,-8.38,-10,987.18


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 374000
  custom_metrics: {}
  date: 2021-10-23_21-28-42
  done: false
  episode_len_mean: 987.1
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.870999999999833
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 453
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4819556953305054e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.258131029870775
          entropy_coeff: 0.009999999999999998
          kl: 0.28609713957541516
          policy_loss: -0.03360857822828823
          total_loss: -0.030961064083708658
          vf_explained_var: -0.4567934274673462
          vf_loss: 0.005228825342944927
    num_agent_steps_sampled: 374000
    num_agent_steps_trained: 374000
    num_steps_sampled: 374000
    num_steps_trained: 374000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,374,5327.1,374000,-9.871,-8.38,-10,987.1


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 375000
  custom_metrics: {}
  date: 2021-10-23_21-28-52
  done: false
  episode_len_mean: 986.98
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.869799999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 454
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.222933542995758e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.3640883480509122
          entropy_coeff: 0.009999999999999998
          kl: 0.3244005113426182
          policy_loss: -0.060301270253128474
          total_loss: -0.05457163370317883
          vf_explained_var: -0.6544412970542908
          vf_loss: 0.00937052121023751
    num_agent_steps_sampled: 375000
    num_agent_steps_trained: 375000
    num_steps_sampled: 375000
    num_steps_trained: 375000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,375,5337.19,375000,-9.8698,-8.38,-10,986.98


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 376000
  custom_metrics: {}
  date: 2021-10-23_21-29-03
  done: false
  episode_len_mean: 986.94
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.869399999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 455
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3344003144936374e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.3041247742043601
          entropy_coeff: 0.009999999999999998
          kl: 0.5587057146761153
          policy_loss: -0.06797115380565326
          total_loss: -0.062183305124441784
          vf_explained_var: -0.623732328414917
          vf_loss: 0.008829095144109386
    num_agent_steps_sampled: 376000
    num_agent_steps_trained: 376000
    num_steps_sampled: 376000
    num_steps_trained: 376000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,376,5347.81,376000,-9.8694,-8.38,-10,986.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 377000
  custom_metrics: {}
  date: 2021-10-23_21-29-13
  done: false
  episode_len_mean: 986.9
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.868999999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 456
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.001600471740456e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.44975547889868417
          entropy_coeff: 0.009999999999999998
          kl: 0.6798782282405429
          policy_loss: -0.03140050835079617
          total_loss: -0.030023052295049032
          vf_explained_var: -0.4666769206523895
          vf_loss: 0.0058750070460114835
    num_agent_steps_sampled: 377000
    num_agent_steps_trained: 377000
    num_steps_sampled: 377000
    num_steps_trained: 377000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,377,5358.14,377000,-9.869,-8.38,-10,986.9


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 378000
  custom_metrics: {}
  date: 2021-10-23_21-29-24
  done: false
  episode_len_mean: 986.74
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.867399999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 457
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.502400707610684e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.5839023898045222
          entropy_coeff: 0.009999999999999998
          kl: 0.2892281558778551
          policy_loss: -0.03620118689205912
          total_loss: -0.03777014546924167
          vf_explained_var: -0.4164268374443054
          vf_loss: 0.004270059551991936
    num_agent_steps_sampled: 378000
    num_agent_steps_trained: 378000
    num_steps_sampled: 378000
    num_steps_trained: 378000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,378,5368.76,378000,-9.8674,-8.38,-10,986.74


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 379000
  custom_metrics: {}
  date: 2021-10-23_21-29-36
  done: false
  episode_len_mean: 986.74
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.867399999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 458
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1253601061416024e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.40410787661870323
          entropy_coeff: 0.009999999999999998
          kl: 0.23984136225448716
          policy_loss: -0.052203430587218866
          total_loss: -0.04727507186018758
          vf_explained_var: -0.4296970069408417
          vf_loss: 0.008969440450891852
    num_agent_steps_sampled: 379000
    num_agent_steps_trained: 379000
    num_steps_sampled: 379000
    num_steps_trained: 37900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,379,5380.68,379000,-9.8674,-8.38,-10,986.74


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 380000
  custom_metrics: {}
  date: 2021-10-23_21-29-47
  done: false
  episode_len_mean: 986.44
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.864399999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 459
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.688040159212404e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.42919884704881245
          entropy_coeff: 0.009999999999999998
          kl: 0.2962718484405842
          policy_loss: -0.033834432603584395
          total_loss: -0.03046482110189067
          vf_explained_var: 0.37813958525657654
          vf_loss: 0.00766160039541622
    num_agent_steps_sampled: 380000
    num_agent_steps_trained: 380000
    num_steps_sampled: 380000
    num_steps_trained: 380000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,380,5391.98,380000,-9.8644,-8.38,-10,986.44


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 381000
  custom_metrics: {}
  date: 2021-10-23_21-29-58
  done: false
  episode_len_mean: 987.38
  episode_media: {}
  episode_reward_max: -8.379999999999866
  episode_reward_mean: -9.873799999999834
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 460
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5320602388186046e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.49997733698950875
          entropy_coeff: 0.009999999999999998
          kl: 0.39471692674689823
          policy_loss: -0.13065526050825912
          total_loss: -0.12388403974473476
          vf_explained_var: -0.07273885607719421
          vf_loss: 0.011770995530403323
    num_agent_steps_sampled: 381000
    num_agent_steps_trained: 381000
    num_steps_sampled: 381000
    num_steps_trained: 38100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,381,5402.57,381000,-9.8738,-8.38,-10,987.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 382000
  custom_metrics: {}
  date: 2021-10-23_21-30-17
  done: false
  episode_len_mean: 980.77
  episode_media: {}
  episode_reward_max: -4.789999999999942
  episode_reward_mean: -9.807699999999835
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 462
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.798090358227909e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.8120706402593189
          entropy_coeff: 0.009999999999999998
          kl: 0.32037286649768554
          policy_loss: -0.09557969503932529
          total_loss: -0.09115266087982389
          vf_explained_var: 0.30702799558639526
          vf_loss: 0.012547739126926496
    num_agent_steps_sampled: 382000
    num_agent_steps_trained: 382000
    num_steps_sampled: 382000
    num_steps_trained: 382000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,382,5422.32,382000,-9.8077,-4.79,-10,980.77


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 383000
  custom_metrics: {}
  date: 2021-10-23_21-30-34
  done: false
  episode_len_mean: 972.17
  episode_media: {}
  episode_reward_max: -4.789999999999942
  episode_reward_mean: -9.721699999999839
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 464
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.697135537341864e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.9925062603420681
          entropy_coeff: 0.009999999999999998
          kl: 0.21911091481645903
          policy_loss: 0.0051825011356009375
          total_loss: 0.005025614301363627
          vf_explained_var: 0.4695398509502411
          vf_loss: 0.00976817364183565
    num_agent_steps_sampled: 383000
    num_agent_steps_trained: 383000
    num_steps_sampled: 383000
    num_steps_trained: 383000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,383,5439.28,383000,-9.7217,-4.79,-10,972.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 384000
  custom_metrics: {}
  date: 2021-10-23_21-30-45
  done: false
  episode_len_mean: 971.34
  episode_media: {}
  episode_reward_max: -4.789999999999942
  episode_reward_mean: -9.713399999999838
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 465
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.545703306012791e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.5608843429221048
          entropy_coeff: 0.009999999999999998
          kl: 0.11596835741980208
          policy_loss: -0.0025604939709107082
          total_loss: -0.0037251000706520344
          vf_explained_var: 0.2370949387550354
          vf_loss: 0.0044442355969092915
    num_agent_steps_sampled: 384000
    num_agent_steps_trained: 384000
    num_steps_sampled: 384000
    num_steps_trained: 3840

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,384,5450.2,384000,-9.7134,-4.79,-10,971.34


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 385000
  custom_metrics: {}
  date: 2021-10-23_21-30-57
  done: false
  episode_len_mean: 971.31
  episode_media: {}
  episode_reward_max: -4.789999999999942
  episode_reward_mean: -9.713099999999837
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 466
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.281855495901919e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.4158596419625812
          entropy_coeff: 0.009999999999999998
          kl: 0.07841386083099577
          policy_loss: -0.04491131984525257
          total_loss: -0.042460324698024327
          vf_explained_var: -0.6690917015075684
          vf_loss: 0.006609591185244628
    num_agent_steps_sampled: 385000
    num_agent_steps_trained: 385000
    num_steps_sampled: 385000
    num_steps_trained: 385000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,385,5461.58,385000,-9.7131,-4.79,-10,971.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 386000
  custom_metrics: {}
  date: 2021-10-23_21-31-07
  done: false
  episode_len_mean: 971.28
  episode_media: {}
  episode_reward_max: -4.789999999999942
  episode_reward_mean: -9.712799999999838
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 467
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9227832438528782e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.5146048737896813
          entropy_coeff: 0.009999999999999998
          kl: 0.1915598133371936
          policy_loss: -0.08242779539691078
          total_loss: -0.07446986283693048
          vf_explained_var: 0.5911112427711487
          vf_loss: 0.013103980286460783
    num_agent_steps_sampled: 386000
    num_agent_steps_trained: 386000
    num_steps_sampled: 386000
    num_steps_trained: 386000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,386,5471.97,386000,-9.7128,-4.79,-10,971.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 387000
  custom_metrics: {}
  date: 2021-10-23_21-31-21
  done: false
  episode_len_mean: 969.44
  episode_media: {}
  episode_reward_max: -4.789999999999942
  episode_reward_mean: -9.694399999999836
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 468
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.884174865779317e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.7176761501365237
          entropy_coeff: 0.009999999999999998
          kl: 0.13759987300468815
          policy_loss: -0.021983951412969164
          total_loss: -0.0210764160586728
          vf_explained_var: 0.5806887149810791
          vf_loss: 0.008084300046579704
    num_agent_steps_sampled: 387000
    num_agent_steps_trained: 387000
    num_steps_sampled: 387000
    num_steps_trained: 387000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,387,5486.08,387000,-9.6944,-4.79,-10,969.44


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 388000
  custom_metrics: {}
  date: 2021-10-23_21-31-33
  done: false
  episode_len_mean: 966.7
  episode_media: {}
  episode_reward_max: -4.789999999999942
  episode_reward_mean: -9.666999999999838
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 469
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.326262298668977e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.7935304515891605
          entropy_coeff: 0.009999999999999998
          kl: 0.41886652563181187
          policy_loss: -0.17999101794428296
          total_loss: -0.17776594385504724
          vf_explained_var: 0.05165604129433632
          vf_loss: 0.01016038072993979
    num_agent_steps_sampled: 388000
    num_agent_steps_trained: 388000
    num_steps_sampled: 388000
    num_steps_trained: 388000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,388,5497.8,388000,-9.667,-4.79,-10,966.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 389000
  custom_metrics: {}
  date: 2021-10-23_21-31-49
  done: false
  episode_len_mean: 961.61
  episode_media: {}
  episode_reward_max: -4.789999999999942
  episode_reward_mean: -9.61609999999984
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 471
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.489393448003464e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.8833931081824833
          entropy_coeff: 0.009999999999999998
          kl: 0.19362883327735794
          policy_loss: -0.04723022911283705
          total_loss: -0.04412556919786665
          vf_explained_var: 0.3384553790092468
          vf_loss: 0.011938594235107302
    num_agent_steps_sampled: 389000
    num_agent_steps_trained: 389000
    num_steps_sampled: 389000
    num_steps_trained: 389000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,389,5513.62,389000,-9.6161,-4.79,-10,961.61


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 390000
  custom_metrics: {}
  date: 2021-10-23_21-32-07
  done: false
  episode_len_mean: 953.13
  episode_media: {}
  episode_reward_max: -4.789999999999942
  episode_reward_mean: -9.531299999999842
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 473
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.734090172005194e-28
          cur_lr: 5.000000000000001e-05
          entropy: 1.003990833626853
          entropy_coeff: 0.009999999999999998
          kl: 0.04300861673222648
          policy_loss: 0.016840215855174593
          total_loss: 0.012157317996025086
          vf_explained_var: -0.14191976189613342
          vf_loss: 0.005357013650548955
    num_agent_steps_sampled: 390000
    num_agent_steps_trained: 390000
    num_steps_sampled: 390000
    num_steps_trained: 390000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,390,5531.66,390000,-9.5313,-4.79,-10,953.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 391000
  custom_metrics: {}
  date: 2021-10-23_21-32-26
  done: false
  episode_len_mean: 943.04
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -9.430399999999844
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 475
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4601135258007795e-27
          cur_lr: 5.000000000000001e-05
          entropy: 1.1473001228438484
          entropy_coeff: 0.009999999999999998
          kl: 0.2399057608925634
          policy_loss: 0.010539930727746751
          total_loss: 0.003476869066556295
          vf_explained_var: -0.6229202151298523
          vf_loss: 0.004409938572078115
    num_agent_steps_sampled: 391000
    num_agent_steps_trained: 391000
    num_steps_sampled: 391000
    num_steps_trained: 391000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,391,5551.13,391000,-9.4304,-4.58,-10,943.04


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 392000
  custom_metrics: {}
  date: 2021-10-23_21-32-45
  done: false
  episode_len_mean: 933.24
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -9.332399999999845
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 477
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1901702887011698e-27
          cur_lr: 5.000000000000001e-05
          entropy: 1.0284973118040297
          entropy_coeff: 0.009999999999999998
          kl: 0.3127996661596828
          policy_loss: 0.08014908019039366
          total_loss: 0.07486631042427487
          vf_explained_var: -0.6067160367965698
          vf_loss: 0.005002202911095487
    num_agent_steps_sampled: 392000
    num_agent_steps_trained: 392000
    num_steps_sampled: 392000
    num_steps_trained: 392000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,392,5569.39,392000,-9.3324,-4.58,-10,933.24


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 393000
  custom_metrics: {}
  date: 2021-10-23_21-32-59
  done: false
  episode_len_mean: 929.86
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -9.298599999999846
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 478
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.285255433051753e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.978116676542494
          entropy_coeff: 0.009999999999999998
          kl: 0.17959403693675996
          policy_loss: -0.020114105112022825
          total_loss: -0.023762020717064538
          vf_explained_var: -0.08396162837743759
          vf_loss: 0.006133249262347817
    num_agent_steps_sampled: 393000
    num_agent_steps_trained: 393000
    num_steps_sampled: 393000
    num_steps_trained: 393000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,393,5583.61,393000,-9.2986,-4.58,-10,929.86




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 394000
  custom_metrics: {}
  date: 2021-10-23_21-33-32
  done: false
  episode_len_mean: 922.7
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -9.226999999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 480
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.927883149577632e-27
          cur_lr: 5.000000000000001e-05
          entropy: 1.0083540081977844
          entropy_coeff: 0.009999999999999998
          kl: 0.19457940740717783
          policy_loss: -0.05414706642429034
          total_loss: -0.05483934308091799
          vf_explained_var: -0.21520839631557465
          vf_loss: 0.009391264833458183
    num_agent_steps_sampled: 394000
    num_agent_steps_trained: 394000
    num_steps_sampled: 394000
    num_steps_trained: 394000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,394,5616.47,394000,-9.227,-4.58,-10,922.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 395000
  custom_metrics: {}
  date: 2021-10-23_21-33-48
  done: false
  episode_len_mean: 919.75
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -9.197499999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 481
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.391824724366448e-27
          cur_lr: 5.000000000000001e-05
          entropy: 1.0603309167755974
          entropy_coeff: 0.009999999999999998
          kl: 0.053070771187129946
          policy_loss: -0.06128545295861032
          total_loss: -0.06549616075224346
          vf_explained_var: -0.5753896832466125
          vf_loss: 0.006392601043141137
    num_agent_steps_sampled: 395000
    num_agent_steps_trained: 395000
    num_steps_sampled: 395000
    num_steps_trained: 395000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,395,5632.37,395000,-9.1975,-4.58,-10,919.75


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 396000
  custom_metrics: {}
  date: 2021-10-23_21-34-03
  done: false
  episode_len_mean: 912.14
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -9.12139999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 483
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1087737086549671e-26
          cur_lr: 5.000000000000001e-05
          entropy: 1.1518607881334093
          entropy_coeff: 0.009999999999999998
          kl: 0.04530597331209315
          policy_loss: -0.10728318724367354
          total_loss: -0.10702786942323049
          vf_explained_var: -0.23947419226169586
          vf_loss: 0.011773924796014197
    num_agent_steps_sampled: 396000
    num_agent_steps_trained: 396000
    num_steps_sampled: 396000
    num_steps_trained: 396000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,396,5647.73,396000,-9.1214,-4.58,-10,912.14


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 397000
  custom_metrics: {}
  date: 2021-10-23_21-34-16
  done: false
  episode_len_mean: 908.72
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -9.08719999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 484
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6631605629824508e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.9390241530206468
          entropy_coeff: 0.009999999999999998
          kl: 0.07879995941701863
          policy_loss: -0.08839435618784693
          total_loss: -0.09221246010727352
          vf_explained_var: -0.5081087946891785
          vf_loss: 0.005572138640046534
    num_agent_steps_sampled: 397000
    num_agent_steps_trained: 397000
    num_steps_sampled: 397000
    num_steps_trained: 397000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,397,5660.34,397000,-9.0872,-4.58,-10,908.72


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 398000
  custom_metrics: {}
  date: 2021-10-23_21-34-31
  done: false
  episode_len_mean: 902.59
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -9.02589999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 486
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4947408444736753e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.8633754200405545
          entropy_coeff: 0.009999999999999998
          kl: 0.05444969328948193
          policy_loss: -0.08460652911000782
          total_loss: -0.08371942672464583
          vf_explained_var: 0.0351545587182045
          vf_loss: 0.00952085714476804
    num_agent_steps_sampled: 398000
    num_agent_steps_trained: 398000
    num_steps_sampled: 398000
    num_steps_trained: 398000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,398,5675.9,398000,-9.0259,-4.58,-10,902.59


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 399000
  custom_metrics: {}
  date: 2021-10-23_21-34-46
  done: false
  episode_len_mean: 898.62
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -8.986199999999853
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 487
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.742111266710515e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.8681196106804742
          entropy_coeff: 0.009999999999999998
          kl: 0.05035137821816736
          policy_loss: -0.06225187240375413
          total_loss: -0.06563821385304133
          vf_explained_var: -0.44452187418937683
          vf_loss: 0.005294855715086063
    num_agent_steps_sampled: 399000
    num_agent_steps_trained: 399000
    num_steps_sampled: 399000
    num_steps_trained: 399000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,399,5690.43,399000,-8.9862,-4.58,-10,898.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 400000
  custom_metrics: {}
  date: 2021-10-23_21-35-02
  done: false
  episode_len_mean: 891.86
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -8.918599999999854
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 489
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.61316690006577e-26
          cur_lr: 5.000000000000001e-05
          entropy: 1.1356463564766779
          entropy_coeff: 0.009999999999999998
          kl: 0.03708063680678606
          policy_loss: -0.08921968522999021
          total_loss: -0.08953089945846134
          vf_explained_var: -0.06974688172340393
          vf_loss: 0.011045242739944823
    num_agent_steps_sampled: 400000
    num_agent_steps_trained: 400000
    num_steps_sampled: 400000
    num_steps_trained: 400000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,400,5706.15,400000,-8.9186,-4.58,-10,891.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 401000
  custom_metrics: {}
  date: 2021-10-23_21-35-19
  done: false
  episode_len_mean: 884.54
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -8.845399999999856
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 491
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.419750350098655e-26
          cur_lr: 5.000000000000001e-05
          entropy: 1.2021245366997189
          entropy_coeff: 0.009999999999999998
          kl: 0.03312045672080583
          policy_loss: 0.01927801387177573
          total_loss: 0.0128396799787879
          vf_explained_var: 0.03946244344115257
          vf_loss: 0.005582909376567437
    num_agent_steps_sampled: 401000
    num_agent_steps_trained: 401000
    num_steps_sampled: 401000
    num_steps_trained: 401000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,401,5723.52,401000,-8.8454,-4.58,-10,884.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 402000
  custom_metrics: {}
  date: 2021-10-23_21-35-35
  done: false
  episode_len_mean: 880.89
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -8.808899999999857
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 492
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2629625525147983e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.132877438598209
          entropy_coeff: 0.009999999999999998
          kl: 0.08232842888683081
          policy_loss: -0.06302766559852493
          total_loss: -0.06682791047626072
          vf_explained_var: -0.573884129524231
          vf_loss: 0.007528532580989931
    num_agent_steps_sampled: 402000
    num_agent_steps_trained: 402000
    num_steps_sampled: 402000
    num_steps_trained: 402000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,402,5739.9,402000,-8.8089,-4.58,-10,880.89


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 403000
  custom_metrics: {}
  date: 2021-10-23_21-35-57
  done: false
  episode_len_mean: 871.88
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -8.71879999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 494
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8944438287721982e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.2483275413513184
          entropy_coeff: 0.009999999999999998
          kl: 0.1139152329829004
          policy_loss: -0.0780064683407545
          total_loss: -0.08496037357383304
          vf_explained_var: 0.7700912356376648
          vf_loss: 0.005529367101068298
    num_agent_steps_sampled: 403000
    num_agent_steps_trained: 403000
    num_steps_sampled: 403000
    num_steps_trained: 403000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,403,5761.54,403000,-8.7188,-4.58,-10,871.88


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 404000
  custom_metrics: {}
  date: 2021-10-23_21-36-16
  done: false
  episode_len_mean: 862.24
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -8.622399999999862
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 496
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.841665743158296e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.2516252517700195
          entropy_coeff: 0.009999999999999998
          kl: 0.10547947982947031
          policy_loss: -0.04609013241198328
          total_loss: -0.049046836958991154
          vf_explained_var: 0.2765301764011383
          vf_loss: 0.009559546889633768
    num_agent_steps_sampled: 404000
    num_agent_steps_trained: 404000
    num_steps_sampled: 404000
    num_steps_trained: 404000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,404,5780.55,404000,-8.6224,-4.58,-10,862.24


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 405000
  custom_metrics: {}
  date: 2021-10-23_21-36-35
  done: false
  episode_len_mean: 853.37
  episode_media: {}
  episode_reward_max: -4.579999999999947
  episode_reward_mean: -8.533699999999863
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 498
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.262498614737446e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.2089466624789769
          entropy_coeff: 0.009999999999999998
          kl: 0.13686238817042776
          policy_loss: -0.012920449674129485
          total_loss: -0.012829960882663726
          vf_explained_var: 0.6090067028999329
          vf_loss: 0.012179951126583748
    num_agent_steps_sampled: 405000
    num_agent_steps_trained: 405000
    num_steps_sampled: 405000
    num_steps_trained: 405000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,405,5799.22,405000,-8.5337,-4.58,-10,853.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 406000
  custom_metrics: {}
  date: 2021-10-23_21-36-54
  done: false
  episode_len_mean: 843.58
  episode_media: {}
  episode_reward_max: -4.43999999999995
  episode_reward_mean: -8.435799999999864
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 500
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.393747922106168e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.2543205128775703
          entropy_coeff: 0.009999999999999998
          kl: 0.1582715523739656
          policy_loss: -0.06296436753537919
          total_loss: -0.06725905653503206
          vf_explained_var: 0.5786135792732239
          vf_loss: 0.008248513277309636
    num_agent_steps_sampled: 406000
    num_agent_steps_trained: 406000
    num_steps_sampled: 406000
    num_steps_trained: 406000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,406,5818.88,406000,-8.4358,-4.44,-10,843.58


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 407000
  custom_metrics: {}
  date: 2021-10-23_21-37-12
  done: false
  episode_len_mean: 834.57
  episode_media: {}
  episode_reward_max: -4.43999999999995
  episode_reward_mean: -8.345699999999866
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 502
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.590621883159251e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.1754307548205059
          entropy_coeff: 0.009999999999999998
          kl: 0.12291219284137091
          policy_loss: 0.1016868752323919
          total_loss: 0.09593445944289367
          vf_explained_var: -0.22929096221923828
          vf_loss: 0.006001893958697716
    num_agent_steps_sampled: 407000
    num_agent_steps_trained: 407000
    num_steps_sampled: 407000
    num_steps_trained: 407000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,407,5836.35,407000,-8.3457,-4.44,-10,834.57


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 408000
  custom_metrics: {}
  date: 2021-10-23_21-37-29
  done: false
  episode_len_mean: 826.59
  episode_media: {}
  episode_reward_max: -4.43999999999995
  episode_reward_mean: -8.265899999999869
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 504
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4385932824738881e-24
          cur_lr: 5.000000000000001e-05
          entropy: 1.2713015768263074
          entropy_coeff: 0.009999999999999998
          kl: 0.14757774774399068
          policy_loss: 0.07447380249698957
          total_loss: 0.06950472088323699
          vf_explained_var: 0.026818765327334404
          vf_loss: 0.007743934650594989
    num_agent_steps_sampled: 408000
    num_agent_steps_trained: 408000
    num_steps_sampled: 408000
    num_steps_trained: 408000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,408,5852.91,408000,-8.2659,-4.44,-10,826.59


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 409000
  custom_metrics: {}
  date: 2021-10-23_21-37-50
  done: false
  episode_len_mean: 815.53
  episode_media: {}
  episode_reward_max: -3.899999999999961
  episode_reward_mean: -8.155299999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 506
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1578899237108317e-24
          cur_lr: 5.000000000000001e-05
          entropy: 1.200395245022244
          entropy_coeff: 0.009999999999999998
          kl: 0.15016973581578996
          policy_loss: 0.06817341695229212
          total_loss: 0.06324673543373743
          vf_explained_var: -0.01242312602698803
          vf_loss: 0.007077273400500417
    num_agent_steps_sampled: 409000
    num_agent_steps_trained: 409000
    num_steps_sampled: 409000
    num_steps_trained: 409000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,409,5874.56,409000,-8.1553,-3.9,-10,815.53


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 410000
  custom_metrics: {}
  date: 2021-10-23_21-38-08
  done: false
  episode_len_mean: 811.6
  episode_media: {}
  episode_reward_max: -3.899999999999961
  episode_reward_mean: -8.115999999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 507
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.236834885566248e-24
          cur_lr: 5.000000000000001e-05
          entropy: 1.1752939701080323
          entropy_coeff: 0.009999999999999998
          kl: 0.29096103492710323
          policy_loss: -0.06516267011562983
          total_loss: -0.06715781564513842
          vf_explained_var: -0.3131518065929413
          vf_loss: 0.009757792168400354
    num_agent_steps_sampled: 410000
    num_agent_steps_trained: 410000
    num_steps_sampled: 410000
    num_steps_trained: 410000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,410,5892.64,410000,-8.116,-3.9,-10,811.6


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 411000
  custom_metrics: {}
  date: 2021-10-23_21-38-26
  done: false
  episode_len_mean: 803.03
  episode_media: {}
  episode_reward_max: -3.899999999999961
  episode_reward_mean: -8.030299999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 509
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.855252328349372e-24
          cur_lr: 5.000000000000001e-05
          entropy: 1.2320873333348168
          entropy_coeff: 0.009999999999999998
          kl: 0.5272997704644998
          policy_loss: -0.04500755773236354
          total_loss: -0.03688451763656404
          vf_explained_var: 0.12378665804862976
          vf_loss: 0.020443914952273996
    num_agent_steps_sampled: 411000
    num_agent_steps_trained: 411000
    num_steps_sampled: 411000
    num_steps_trained: 411000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,411,5910.16,411000,-8.0303,-3.9,-10,803.03




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 412000
  custom_metrics: {}
  date: 2021-10-23_21-38-57
  done: false
  episode_len_mean: 799.86
  episode_media: {}
  episode_reward_max: -3.899999999999961
  episode_reward_mean: -7.998599999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 510
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.282878492524056e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.8658371455139584
          entropy_coeff: 0.009999999999999998
          kl: 0.21098481809927358
          policy_loss: -0.03368275939590401
          total_loss: -0.03504613120522764
          vf_explained_var: -0.2614554762840271
          vf_loss: 0.007295002188119623
    num_agent_steps_sampled: 412000
    num_agent_steps_trained: 412000
    num_steps_sampled: 412000
    num_steps_trained: 412000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,412,5941.05,412000,-7.9986,-3.9,-10,799.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 413000
  custom_metrics: {}
  date: 2021-10-23_21-39-09
  done: false
  episode_len_mean: 795.68
  episode_media: {}
  episode_reward_max: -3.899999999999961
  episode_reward_mean: -7.956799999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 512
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0924317738786084e-23
          cur_lr: 5.000000000000001e-05
          entropy: 1.0330025341775682
          entropy_coeff: 0.009999999999999998
          kl: 0.4629894607596927
          policy_loss: 0.06695364481872983
          total_loss: 0.06327420754565133
          vf_explained_var: -0.36470159888267517
          vf_loss: 0.006650586056316065
    num_agent_steps_sampled: 413000
    num_agent_steps_trained: 413000
    num_steps_sampled: 413000
    num_steps_trained: 413000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,413,5953.27,413000,-7.9568,-3.9,-10,795.68


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 414000
  custom_metrics: {}
  date: 2021-10-23_21-39-23
  done: false
  episode_len_mean: 794.18
  episode_media: {}
  episode_reward_max: -3.899999999999961
  episode_reward_mean: -7.941799999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 513
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6386476608179121e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.8190982616609997
          entropy_coeff: 0.009999999999999998
          kl: 0.48857246744963856
          policy_loss: -0.0478495042771101
          total_loss: -0.0488751660204596
          vf_explained_var: -0.0370672233402729
          vf_loss: 0.007165319695033961
    num_agent_steps_sampled: 414000
    num_agent_steps_trained: 414000
    num_steps_sampled: 414000
    num_steps_trained: 414000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,414,5967.16,414000,-7.9418,-3.9,-10,794.18


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 415000
  custom_metrics: {}
  date: 2021-10-23_21-39-37
  done: false
  episode_len_mean: 791.39
  episode_media: {}
  episode_reward_max: -3.899999999999961
  episode_reward_mean: -7.913899999999875
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 514
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4579714912268678e-23
          cur_lr: 5.000000000000001e-05
          entropy: 1.009163631995519
          entropy_coeff: 0.009999999999999998
          kl: 0.4241669354753362
          policy_loss: 0.040771545469760896
          total_loss: 0.037418906225098504
          vf_explained_var: -0.2163882553577423
          vf_loss: 0.0067389972507953646
    num_agent_steps_sampled: 415000
    num_agent_steps_trained: 415000
    num_steps_sampled: 415000
    num_steps_trained: 415000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,415,5980.94,415000,-7.9139,-3.9,-10,791.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 416000
  custom_metrics: {}
  date: 2021-10-23_21-39-52
  done: false
  episode_len_mean: 784.81
  episode_media: {}
  episode_reward_max: -3.899999999999961
  episode_reward_mean: -7.848099999999877
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 516
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6869572368403024e-23
          cur_lr: 5.000000000000001e-05
          entropy: 1.0818138407336342
          entropy_coeff: 0.009999999999999998
          kl: 0.38344470726119145
          policy_loss: 0.07176901143458154
          total_loss: 0.07025376868744691
          vf_explained_var: 0.15176540613174438
          vf_loss: 0.009302892909747444
    num_agent_steps_sampled: 416000
    num_agent_steps_trained: 416000
    num_steps_sampled: 416000
    num_steps_trained: 416000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,416,5996.08,416000,-7.8481,-3.9,-10,784.81


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 417000
  custom_metrics: {}
  date: 2021-10-23_21-40-10
  done: false
  episode_len_mean: 774.31
  episode_media: {}
  episode_reward_max: -3.6399999999999664
  episode_reward_mean: -7.743099999999879
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 518
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.530435855260453e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.9679991086324056
          entropy_coeff: 0.009999999999999998
          kl: 0.95210073871745
          policy_loss: -0.02710863442884551
          total_loss: -0.029765016088883083
          vf_explained_var: 0.15714344382286072
          vf_loss: 0.0070236100226692445
    num_agent_steps_sampled: 417000
    num_agent_steps_trained: 417000
    num_steps_sampled: 417000
    num_steps_trained: 417000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,417,6014.03,417000,-7.7431,-3.64,-10,774.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 418000
  custom_metrics: {}
  date: 2021-10-23_21-40-27
  done: false
  episode_len_mean: 767.13
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.6712999999998805
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 520
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.295653782890682e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.8784824477301704
          entropy_coeff: 0.009999999999999998
          kl: 0.664046448510554
          policy_loss: 0.021985360897249645
          total_loss: 0.022781854785150952
          vf_explained_var: -0.302073210477829
          vf_loss: 0.009581318681982035
    num_agent_steps_sampled: 418000
    num_agent_steps_trained: 418000
    num_steps_sampled: 418000
    num_steps_trained: 418000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,418,6030.75,418000,-7.6713,-3.21,-10,767.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 419000
  custom_metrics: {}
  date: 2021-10-23_21-40-38
  done: false
  episode_len_mean: 766.71
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.66709999999988
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 521
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2443480674336021e-22
          cur_lr: 5.000000000000001e-05
          entropy: 1.213593751192093
          entropy_coeff: 0.009999999999999998
          kl: 0.8538363860713111
          policy_loss: 0.07700225890924534
          total_loss: 0.06651283734374576
          vf_explained_var: -0.9228380918502808
          vf_loss: 0.0016465157215457616
    num_agent_steps_sampled: 419000
    num_agent_steps_trained: 419000
    num_steps_sampled: 419000
    num_steps_trained: 419000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,419,6041.81,419000,-7.6671,-3.21,-10,766.71


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 420000
  custom_metrics: {}
  date: 2021-10-23_21-40-55
  done: false
  episode_len_mean: 756.25
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.562499999999883
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 523
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.866522101150403e-22
          cur_lr: 5.000000000000001e-05
          entropy: 1.4733375284406873
          entropy_coeff: 0.009999999999999998
          kl: 0.9686434128218226
          policy_loss: 0.07225566605726878
          total_loss: 0.06863327738311556
          vf_explained_var: 0.7535776495933533
          vf_loss: 0.011110988797412978
    num_agent_steps_sampled: 420000
    num_agent_steps_trained: 420000
    num_steps_sampled: 420000
    num_steps_trained: 420000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,420,6059.52,420000,-7.5625,-3.21,-10,756.25


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 421000
  custom_metrics: {}
  date: 2021-10-23_21-41-06
  done: false
  episode_len_mean: 755.63
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.556299999999884
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 524
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7997831517256047e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.8301320261425442
          entropy_coeff: 0.009999999999999998
          kl: 0.5241102125909594
          policy_loss: -0.06172791322072347
          total_loss: -0.06048678664697541
          vf_explained_var: -0.636086642742157
          vf_loss: 0.009542448293197796
    num_agent_steps_sampled: 421000
    num_agent_steps_trained: 421000
    num_steps_sampled: 421000
    num_steps_trained: 421000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,421,6070.03,421000,-7.5563,-3.21,-10,755.63


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 422000
  custom_metrics: {}
  date: 2021-10-23_21-41-19
  done: false
  episode_len_mean: 753.54
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.535399999999883
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 525
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.199674727588407e-22
          cur_lr: 5.000000000000001e-05
          entropy: 1.1952927364243402
          entropy_coeff: 0.009999999999999998
          kl: 3.295489862230089
          policy_loss: 0.07760069051550494
          total_loss: 0.08266703486442566
          vf_explained_var: 0.5660259127616882
          vf_loss: 0.01701926851593372
    num_agent_steps_sampled: 422000
    num_agent_steps_trained: 422000
    num_steps_sampled: 422000
    num_steps_trained: 422000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,422,6082.73,422000,-7.5354,-3.21,-10,753.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 423000
  custom_metrics: {}
  date: 2021-10-23_21-41-35
  done: false
  episode_len_mean: 747.64
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.476399999999884
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 527
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.299512091382611e-22
          cur_lr: 5.000000000000001e-05
          entropy: 1.060828482773569
          entropy_coeff: 0.009999999999999998
          kl: 0.35420309826731683
          policy_loss: 0.20191524831785096
          total_loss: 0.19819725222057766
          vf_explained_var: 0.22027087211608887
          vf_loss: 0.00689029137008927
    num_agent_steps_sampled: 423000
    num_agent_steps_trained: 423000
    num_steps_sampled: 423000
    num_steps_trained: 423000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,423,6099.42,423000,-7.4764,-3.21,-10,747.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 424000
  custom_metrics: {}
  date: 2021-10-23_21-41-52
  done: false
  episode_len_mean: 744.88
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.448799999999887
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 528
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.449268137073918e-22
          cur_lr: 5.000000000000001e-05
          entropy: 1.3445710059669282
          entropy_coeff: 0.009999999999999998
          kl: 0.47823237006862956
          policy_loss: -0.08805711484617657
          total_loss: -0.0982387180129687
          vf_explained_var: -0.39848700165748596
          vf_loss: 0.0032641053387326086
    num_agent_steps_sampled: 424000
    num_agent_steps_trained: 424000
    num_steps_sampled: 424000
    num_steps_trained: 424000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,424,6115.48,424000,-7.4488,-3.21,-10,744.88


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 425000
  custom_metrics: {}
  date: 2021-10-23_21-42-13
  done: false
  episode_len_mean: 738.08
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.380799999999888
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 530
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.417390220561088e-21
          cur_lr: 5.000000000000001e-05
          entropy: 1.6240293092197842
          entropy_coeff: 0.009999999999999998
          kl: 0.31501320066033967
          policy_loss: 0.10187981476386389
          total_loss: 0.08963507049613528
          vf_explained_var: -0.3851725161075592
          vf_loss: 0.003995548012769885
    num_agent_steps_sampled: 425000
    num_agent_steps_trained: 425000
    num_steps_sampled: 425000
    num_steps_trained: 425000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,425,6137.21,425000,-7.3808,-3.21,-10,738.08


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 426000
  custom_metrics: {}
  date: 2021-10-23_21-42-31
  done: false
  episode_len_mean: 736.43
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.364299999999888
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 531
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1260853308416318e-21
          cur_lr: 5.000000000000001e-05
          entropy: 1.0163813511530557
          entropy_coeff: 0.009999999999999998
          kl: 0.41704618251985975
          policy_loss: -0.034400586452749043
          total_loss: -0.041403977490133706
          vf_explained_var: -0.36719271540641785
          vf_loss: 0.0031604196501171422
    num_agent_steps_sampled: 426000
    num_agent_steps_trained: 426000
    num_steps_sampled: 426000
    num_steps_trained: 42

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,426,6155.02,426000,-7.3643,-3.21,-10,736.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 427000
  custom_metrics: {}
  date: 2021-10-23_21-42-47
  done: false
  episode_len_mean: 736.47
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.364699999999886
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 532
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.189127996262447e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.5463438603613112
          entropy_coeff: 0.009999999999999998
          kl: 0.15025639086961745
          policy_loss: -0.019996101823118
          total_loss: -0.021494128472275203
          vf_explained_var: -0.4389767050743103
          vf_loss: 0.003965413017431274
    num_agent_steps_sampled: 427000
    num_agent_steps_trained: 427000
    num_steps_sampled: 427000
    num_steps_trained: 427000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,427,6171.34,427000,-7.3647,-3.21,-10,736.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 428000
  custom_metrics: {}
  date: 2021-10-23_21-43-02
  done: false
  episode_len_mean: 735.98
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.359799999999888
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 533
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.7836919943936715e-21
          cur_lr: 5.000000000000001e-05
          entropy: 1.3473673323790232
          entropy_coeff: 0.009999999999999998
          kl: 2.320147520966
          policy_loss: 0.07398734548025661
          total_loss: 0.06479082173771328
          vf_explained_var: -0.1504184901714325
          vf_loss: 0.004277148673362616
    num_agent_steps_sampled: 428000
    num_agent_steps_trained: 428000
    num_steps_sampled: 428000
    num_steps_trained: 428000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,428,6185.42,428000,-7.3598,-3.21,-10,735.98


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 429000
  custom_metrics: {}
  date: 2021-10-23_21-43-20
  done: false
  episode_len_mean: 733.45
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.33449999999989
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 534
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.175537991590505e-21
          cur_lr: 5.000000000000001e-05
          entropy: 1.0416513045628866
          entropy_coeff: 0.009999999999999998
          kl: 1.0356511620183786
          policy_loss: -0.013335031767686209
          total_loss: -0.020883969714244208
          vf_explained_var: -0.432820200920105
          vf_loss: 0.0028675783097343002
    num_agent_steps_sampled: 429000
    num_agent_steps_trained: 429000
    num_steps_sampled: 429000
    num_steps_trained: 429000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,429,6203.8,429000,-7.3345,-3.21,-10,733.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 430000
  custom_metrics: {}
  date: 2021-10-23_21-43-36
  done: false
  episode_len_mean: 733.8
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.337999999999889
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 535
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0763306987385763e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.4995675871769587
          entropy_coeff: 0.009999999999999998
          kl: 0.12702577023042572
          policy_loss: -0.1564088414526648
          total_loss: -0.15594837280611198
          vf_explained_var: -0.053500737994909286
          vf_loss: 0.005456144134708059
    num_agent_steps_sampled: 430000
    num_agent_steps_trained: 430000
    num_steps_sampled: 430000
    num_steps_trained: 430000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,430,6219.39,430000,-7.338,-3.21,-10,733.8


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 431000
  custom_metrics: {}
  date: 2021-10-23_21-43-54
  done: false
  episode_len_mean: 727.71
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.27709999999989
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 537
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.614496048107864e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.2121560593446097
          entropy_coeff: 0.009999999999999998
          kl: 0.8744975576798121
          policy_loss: -0.012351712418927087
          total_loss: -0.01738775567048126
          vf_explained_var: -0.05745340511202812
          vf_loss: 0.0070855189066302655
    num_agent_steps_sampled: 431000
    num_agent_steps_trained: 431000
    num_steps_sampled: 431000
    num_steps_trained: 431000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,431,6238.07,431000,-7.2771,-3.21,-10,727.71


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 432000
  custom_metrics: {}
  date: 2021-10-23_21-44-12
  done: false
  episode_len_mean: 724.54
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.245399999999889
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 538
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4217440721617957e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.8627615269687441
          entropy_coeff: 0.009999999999999998
          kl: 0.7312777136762937
          policy_loss: -0.009686043775743908
          total_loss: -0.014999149036076334
          vf_explained_var: -0.414726585149765
          vf_loss: 0.0033145116601695514
    num_agent_steps_sampled: 432000
    num_agent_steps_trained: 432000
    num_steps_sampled: 432000
    num_steps_trained: 43200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,432,6255.43,432000,-7.2454,-3.21,-10,724.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 433000
  custom_metrics: {}
  date: 2021-10-23_21-44-27
  done: false
  episode_len_mean: 724.87
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.248699999999891
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 539
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.632616108242694e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.0199997882048288
          entropy_coeff: 0.009999999999999998
          kl: 0.5116388852397601
          policy_loss: -0.07198907600508796
          total_loss: -0.07732482552528382
          vf_explained_var: -0.5237723588943481
          vf_loss: 0.004864243222335871
    num_agent_steps_sampled: 433000
    num_agent_steps_trained: 433000
    num_steps_sampled: 433000
    num_steps_trained: 433000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,433,6270.86,433000,-7.2487,-3.21,-10,724.87




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 434000
  custom_metrics: {}
  date: 2021-10-23_21-44-59
  done: false
  episode_len_mean: 723.59
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.235899999999891
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 540
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.448924162364038e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.8599173453119066
          entropy_coeff: 0.009999999999999998
          kl: 0.7734416484004921
          policy_loss: -0.062246546645959216
          total_loss: -0.06559251546859741
          vf_explained_var: -0.4106592833995819
          vf_loss: 0.005253203391233304
    num_agent_steps_sampled: 434000
    num_agent_steps_trained: 434000
    num_steps_sampled: 434000
    num_steps_trained: 434000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,434,6302.61,434000,-7.2359,-3.21,-10,723.59


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 435000
  custom_metrics: {}
  date: 2021-10-23_21-45-18
  done: false
  episode_len_mean: 719.86
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.198599999999891
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 542
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.173386243546062e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.2305773599280252
          entropy_coeff: 0.009999999999999998
          kl: 1.6467519126832486
          policy_loss: 0.11153971751530965
          total_loss: 0.10286133719815148
          vf_explained_var: -0.33613553643226624
          vf_loss: 0.0036273960910168375
    num_agent_steps_sampled: 435000
    num_agent_steps_trained: 435000
    num_steps_sampled: 435000
    num_steps_trained: 435000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,435,6321.54,435000,-7.1986,-3.21,-10,719.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 436000
  custom_metrics: {}
  date: 2021-10-23_21-45-34
  done: false
  episode_len_mean: 717.3
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.172999999999893
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 543
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2260079365319093e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.0981988979710473
          entropy_coeff: 0.009999999999999998
          kl: 1.4912757354478041
          policy_loss: 0.15403108596801757
          total_loss: 0.1451722072230445
          vf_explained_var: -0.27202853560447693
          vf_loss: 0.0021231077773134328
    num_agent_steps_sampled: 436000
    num_agent_steps_trained: 436000
    num_steps_sampled: 436000
    num_steps_trained: 436000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,436,6337.34,436000,-7.173,-3.21,-10,717.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 437000
  custom_metrics: {}
  date: 2021-10-23_21-45-48
  done: false
  episode_len_mean: 718.14
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.181399999999892
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 544
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8390119047978638e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.997617738114463
          entropy_coeff: 0.009999999999999998
          kl: 0.6962891793499391
          policy_loss: -0.06393699612882402
          total_loss: -0.06178969548394283
          vf_explained_var: -0.5704582929611206
          vf_loss: 0.012123476375644612
    num_agent_steps_sampled: 437000
    num_agent_steps_trained: 437000
    num_steps_sampled: 437000
    num_steps_trained: 437000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,437,6351.74,437000,-7.1814,-3.21,-10,718.14


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 438000
  custom_metrics: {}
  date: 2021-10-23_21-46-01
  done: false
  episode_len_mean: 718.06
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.180599999999893
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 545
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.758517857196795e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.5087389085027907
          entropy_coeff: 0.009999999999999998
          kl: 3.1174213588237762
          policy_loss: 0.04450126273764504
          total_loss: 0.033170035150316024
          vf_explained_var: -0.12163353711366653
          vf_loss: 0.0037561697012279184
    num_agent_steps_sampled: 438000
    num_agent_steps_trained: 438000
    num_steps_sampled: 438000
    num_steps_trained: 438000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,438,6364.87,438000,-7.1806,-3.21,-10,718.06


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 439000
  custom_metrics: {}
  date: 2021-10-23_21-46-17
  done: false
  episode_len_mean: 716.6
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.165999999999892
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 546
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.137776785795192e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.7573387157585886
          entropy_coeff: 0.009999999999999998
          kl: 0.8962075162265036
          policy_loss: 0.10497600336869557
          total_loss: 0.10150810711913638
          vf_explained_var: 0.12633875012397766
          vf_loss: 0.004105490054563335
    num_agent_steps_sampled: 439000
    num_agent_steps_trained: 439000
    num_steps_sampled: 439000
    num_steps_trained: 439000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,439,6380.5,439000,-7.166,-3.21,-10,716.6


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 440000
  custom_metrics: {}
  date: 2021-10-23_21-46-32
  done: false
  episode_len_mean: 716.65
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.166499999999894
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 547
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.20666517869279e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.9102070010370679
          entropy_coeff: 0.009999999999999998
          kl: 1.3265840051074822
          policy_loss: -0.12470053328822056
          total_loss: -0.13032808357642756
          vf_explained_var: 0.1604163944721222
          vf_loss: 0.0034745223412755875
    num_agent_steps_sampled: 440000
    num_agent_steps_trained: 440000
    num_steps_sampled: 440000
    num_steps_trained: 440000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,440,6395.85,440000,-7.1665,-3.21,-10,716.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 441000
  custom_metrics: {}
  date: 2021-10-23_21-46-48
  done: false
  episode_len_mean: 716.32
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.163199999999892
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 548
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.309997768039182e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.669628447956509
          entropy_coeff: 0.009999999999999998
          kl: 1.6885784321361117
          policy_loss: 0.0055149686005380415
          total_loss: -0.004374336120155122
          vf_explained_var: -0.14907914400100708
          vf_loss: 0.006806975102517754
    num_agent_steps_sampled: 441000
    num_agent_steps_trained: 441000
    num_steps_sampled: 441000
    num_steps_trained: 441000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,441,6411.87,441000,-7.1632,-3.21,-10,716.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 442000
  custom_metrics: {}
  date: 2021-10-23_21-47-06
  done: false
  episode_len_mean: 713.26
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.1325999999998935
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 550
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3964996652058776e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.3805041127734714
          entropy_coeff: 0.009999999999999998
          kl: 0.8243116005841229
          policy_loss: -0.0076292804545826385
          total_loss: -0.018301283485359615
          vf_explained_var: -0.029852572828531265
          vf_loss: 0.0031330353712999367
    num_agent_steps_sampled: 442000
    num_agent_steps_trained: 442000
    num_steps_sampled: 442000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,442,6429.2,442000,-7.1326,-3.21,-10,713.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 443000
  custom_metrics: {}
  date: 2021-10-23_21-47-26
  done: false
  episode_len_mean: 705.67
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.056699999999894
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 552
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0947494978088166e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.37699184483952
          entropy_coeff: 0.009999999999999998
          kl: 1.241032945447498
          policy_loss: 0.12745118770334457
          total_loss: 0.11842790891726812
          vf_explained_var: -0.5234364867210388
          vf_loss: 0.004746642091777176
    num_agent_steps_sampled: 443000
    num_agent_steps_trained: 443000
    num_steps_sampled: 443000
    num_steps_trained: 443000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,443,6449.23,443000,-7.0567,-3.21,-10,705.67


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 444000
  custom_metrics: {}
  date: 2021-10-23_21-47-46
  done: false
  episode_len_mean: 700.06
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.000599999999896
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 553
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1421242467132246e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.4437828242778779
          entropy_coeff: 0.009999999999999998
          kl: 0.8805141444007556
          policy_loss: 0.017715970675150555
          total_loss: 0.007179518540700276
          vf_explained_var: -0.2741047739982605
          vf_loss: 0.003901379918291544
    num_agent_steps_sampled: 444000
    num_agent_steps_trained: 444000
    num_steps_sampled: 444000
    num_steps_trained: 444000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,444,6469.44,444000,-7.0006,-3.21,-10,700.06


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 445000
  custom_metrics: {}
  date: 2021-10-23_21-48-07
  done: false
  episode_len_mean: 692.27
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.922699999999898
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 555
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.713186370069837e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.363820328977373
          entropy_coeff: 0.009999999999999998
          kl: 0.47593516326612895
          policy_loss: -0.051719312535391915
          total_loss: -0.0605123537282149
          vf_explained_var: 0.06399624794721603
          vf_loss: 0.004845168427628879
    num_agent_steps_sampled: 445000
    num_agent_steps_trained: 445000
    num_steps_sampled: 445000
    num_steps_trained: 445000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,445,6490.33,445000,-6.9227,-3.21,-10,692.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 446000
  custom_metrics: {}
  date: 2021-10-23_21-48-30
  done: false
  episode_len_mean: 675.26
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.752599999999901
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 3
  episodes_total: 558
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.069779555104754e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.6261100716061063
          entropy_coeff: 0.009999999999999998
          kl: 0.4875211726046271
          policy_loss: 0.11253472715616226
          total_loss: 0.10377987027168274
          vf_explained_var: -0.08800292760133743
          vf_loss: 0.007506243790137685
    num_agent_steps_sampled: 446000
    num_agent_steps_trained: 446000
    num_steps_sampled: 446000
    num_steps_trained: 446000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,446,6513.59,446000,-6.7526,-3.21,-10,675.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 447000
  custom_metrics: {}
  date: 2021-10-23_21-49-01
  done: false
  episode_len_mean: 663.65
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.636499999999903
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 560
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0604669332657136e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.773198095957438
          entropy_coeff: 0.009999999999999998
          kl: 0.22637561394108666
          policy_loss: -0.03797493879165914
          total_loss: -0.04917722584472762
          vf_explained_var: 0.5694513320922852
          vf_loss: 0.006529690111832073
    num_agent_steps_sampled: 447000
    num_agent_steps_trained: 447000
    num_steps_sampled: 447000
    num_steps_trained: 447000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,447,6544.1,447000,-6.6365,-3.21,-10,663.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 448000
  custom_metrics: {}
  date: 2021-10-23_21-49-31
  done: false
  episode_len_mean: 656.1
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.560999999999905
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 3
  episodes_total: 563
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5907003998985696e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.8520195470915901
          entropy_coeff: 0.009999999999999998
          kl: 0.04967074210031165
          policy_loss: -0.11502720324529542
          total_loss: -0.1285208850271172
          vf_explained_var: 0.7616545557975769
          vf_loss: 0.005026512197218835
    num_agent_steps_sampled: 448000
    num_agent_steps_trained: 448000
    num_steps_sampled: 448000
    num_steps_trained: 448000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,448,6574.41,448000,-6.561,-3.21,-10,656.1


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 449000
  custom_metrics: {}
  date: 2021-10-23_21-50-01
  done: false
  episode_len_mean: 641.39
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.413899999999909
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 3
  episodes_total: 566
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.386050599847855e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.973190148671468
          entropy_coeff: 0.009999999999999998
          kl: 0.347268515618311
          policy_loss: 0.07391476747062471
          total_loss: 0.057368238435851204
          vf_explained_var: 0.7762272953987122
          vf_loss: 0.0031853708166939515
    num_agent_steps_sampled: 449000
    num_agent_steps_trained: 449000
    num_steps_sampled: 449000
    num_steps_trained: 449000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,449,6604.52,449000,-6.4139,-3.21,-10,641.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 450000
  custom_metrics: {}
  date: 2021-10-23_21-50-22
  done: false
  episode_len_mean: 630.56
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.305599999999909
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 568
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.579075899771782e-17
          cur_lr: 5.000000000000001e-05
          entropy: 2.3317183507813346
          entropy_coeff: 0.009999999999999998
          kl: 1.0779258236082063
          policy_loss: 0.35742640958891975
          total_loss: 0.3970825256572829
          vf_explained_var: 0.030032724142074585
          vf_loss: 0.06297329934313893
    num_agent_steps_sampled: 450000
    num_agent_steps_trained: 450000
    num_steps_sampled: 450000
    num_steps_trained: 450000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,450,6624.84,450000,-6.3056,-3.21,-10,630.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 451000
  custom_metrics: {}
  date: 2021-10-23_21-50-36
  done: false
  episode_len_mean: 633.3
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.332999999999909
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 569
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.368613849657674e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.779407247569826
          entropy_coeff: 0.009999999999999998
          kl: 3.0689492967393663
          policy_loss: 0.11309901049826294
          total_loss: 0.34678775597777634
          vf_explained_var: -0.318978488445282
          vf_loss: 0.25148281709601483
    num_agent_steps_sampled: 451000
    num_agent_steps_trained: 451000
    num_steps_sampled: 451000
    num_steps_trained: 451000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,451,6638.83,451000,-6.333,-3.21,-10,633.3




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 452000
  custom_metrics: {}
  date: 2021-10-23_21-51-07
  done: false
  episode_len_mean: 632.16
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.3215999999999095
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 570
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.052920774486509e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.9724754050374032
          entropy_coeff: 0.009999999999999998
          kl: 1.601499573720826
          policy_loss: 0.00818317968191372
          total_loss: 0.14787313108228975
          vf_explained_var: -0.2202656865119934
          vf_loss: 0.14941470310505894
    num_agent_steps_sampled: 452000
    num_agent_steps_trained: 452000
    num_steps_sampled: 452000
    num_steps_trained: 452000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,452,6670.44,452000,-6.3216,-3.21,-10,632.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 453000
  custom_metrics: {}
  date: 2021-10-23_21-51-27
  done: false
  episode_len_mean: 634.45
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.344499999999909
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 571
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2079381161729765e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6981335871749454
          entropy_coeff: 0.009999999999999998
          kl: 0.4838659044769075
          policy_loss: -0.0572672113776207
          total_loss: -0.038570971414446834
          vf_explained_var: -0.4277442991733551
          vf_loss: 0.025677572339514478
    num_agent_steps_sampled: 453000
    num_agent_steps_trained: 453000
    num_steps_sampled: 453000
    num_steps_trained: 453000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,453,6689.85,453000,-6.3445,-3.21,-10,634.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 454000
  custom_metrics: {}
  date: 2021-10-23_21-51-43
  done: false
  episode_len_mean: 640.3
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.402999999999909
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 573
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8119071742594646e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.6461037985152669
          entropy_coeff: 0.009999999999999998
          kl: 1.2169280303849115
          policy_loss: 0.02297526697317759
          total_loss: 0.05190157840649287
          vf_explained_var: -0.21864384412765503
          vf_loss: 0.035387349968206964
    num_agent_steps_sampled: 454000
    num_agent_steps_trained: 454000
    num_steps_sampled: 454000
    num_steps_trained: 454000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,454,6705.86,454000,-6.403,-3.21,-10,640.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 455000
  custom_metrics: {}
  date: 2021-10-23_21-51-57
  done: false
  episode_len_mean: 643.97
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.439699999999908
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 574
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.717860761389198e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.8581479382183816
          entropy_coeff: 0.009999999999999998
          kl: 0.6972862631082535
          policy_loss: 0.1573409596251117
          total_loss: 0.15626579903894
          vf_explained_var: 0.5743753910064697
          vf_loss: 0.00750632018963289
    num_agent_steps_sampled: 455000
    num_agent_steps_trained: 455000
    num_steps_sampled: 455000
    num_steps_trained: 455000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,455,6719.97,455000,-6.4397,-3.21,-10,643.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 456000
  custom_metrics: {}
  date: 2021-10-23_21-52-11
  done: false
  episode_len_mean: 649.39
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.493899999999908
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 575
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.076791142083796e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.5477941297822528
          entropy_coeff: 0.009999999999999998
          kl: 0.326490414266785
          policy_loss: -0.0728074366847674
          total_loss: -0.045749881366888685
          vf_explained_var: -0.6534948348999023
          vf_loss: 0.032535498346745344
    num_agent_steps_sampled: 456000
    num_agent_steps_trained: 456000
    num_steps_sampled: 456000
    num_steps_trained: 456000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,456,6734.31,456000,-6.4939,-3.21,-10,649.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 457000
  custom_metrics: {}
  date: 2021-10-23_21-52-26
  done: false
  episode_len_mean: 653.86
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.5385999999999065
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 576
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.115186713125694e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.30115601213441956
          entropy_coeff: 0.009999999999999998
          kl: 0.22178986970749165
          policy_loss: -0.1044151140583886
          total_loss: -0.06712549274994267
          vf_explained_var: -0.13973265886306763
          vf_loss: 0.040301183947465485
    num_agent_steps_sampled: 457000
    num_agent_steps_trained: 457000
    num_steps_sampled: 457000
    num_steps_trained: 45700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,457,6748.92,457000,-6.5386,-3.21,-10,653.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 458000
  custom_metrics: {}
  date: 2021-10-23_21-52-41
  done: false
  episode_len_mean: 659.07
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.590699999999905
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 577
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.172780069688542e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.2808352437284258
          entropy_coeff: 0.009999999999999998
          kl: 0.9158498977414435
          policy_loss: -0.06109080861012141
          total_loss: -0.037570596237977344
          vf_explained_var: -0.6080776453018188
          vf_loss: 0.026328568536943445
    num_agent_steps_sampled: 458000
    num_agent_steps_trained: 458000
    num_steps_sampled: 458000
    num_steps_trained: 458000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,458,6764.18,458000,-6.5907,-3.21,-10,659.07


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 459000
  custom_metrics: {}
  date: 2021-10-23_21-52-57
  done: false
  episode_len_mean: 662.45
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.624499999999905
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 578
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3759170104532813e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.41960622432331246
          entropy_coeff: 0.009999999999999998
          kl: 0.4059267600377401
          policy_loss: -0.05552760683414009
          total_loss: -0.00487872717074222
          vf_explained_var: -0.1029694452881813
          vf_loss: 0.05484494045894179
    num_agent_steps_sampled: 459000
    num_agent_steps_trained: 459000
    num_steps_sampled: 459000
    num_steps_trained: 459000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,459,6779.86,459000,-6.6245,-3.21,-10,662.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 460000
  custom_metrics: {}
  date: 2021-10-23_21-53-11
  done: false
  episode_len_mean: 665.08
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.6507999999999035
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 579
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.063875515679922e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.6666663572192192
          entropy_coeff: 0.009999999999999998
          kl: 2.3313527981854145
          policy_loss: -0.0017754695481724209
          total_loss: 0.03840996225674947
          vf_explained_var: -0.5100361704826355
          vf_loss: 0.04685210361559358
    num_agent_steps_sampled: 460000
    num_agent_steps_trained: 460000
    num_steps_sampled: 460000
    num_steps_trained: 460000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,460,6793.99,460000,-6.6508,-3.21,-10,665.08


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 461000
  custom_metrics: {}
  date: 2021-10-23_21-53-25
  done: false
  episode_len_mean: 668.89
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.6888999999999035
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 580
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0958132735198823e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5193999278876517
          entropy_coeff: 0.009999999999999998
          kl: 0.7409388767141435
          policy_loss: -0.11896465840852923
          total_loss: -0.1036080397820721
          vf_explained_var: -0.48156866431236267
          vf_loss: 0.020550617975014677
    num_agent_steps_sampled: 461000
    num_agent_steps_trained: 461000
    num_steps_sampled: 461000
    num_steps_trained: 461000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,461,6807.6,461000,-6.6889,-3.21,-10,668.89


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 462000
  custom_metrics: {}
  date: 2021-10-23_21-53-41
  done: false
  episode_len_mean: 671.76
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.717599999999902
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 581
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.643719910279825e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.3501993520392312
          entropy_coeff: 0.009999999999999998
          kl: 0.5292101138581832
          policy_loss: 0.00565499323937628
          total_loss: 0.023843571709262
          vf_explained_var: -0.3071458041667938
          vf_loss: 0.02169056868620424
    num_agent_steps_sampled: 462000
    num_agent_steps_trained: 462000
    num_steps_sampled: 462000
    num_steps_trained: 462000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,462,6823.76,462000,-6.7176,-3.21,-10,671.76


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 463000
  custom_metrics: {}
  date: 2021-10-23_21-53-55
  done: false
  episode_len_mean: 675.23
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.752299999999902
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 582
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.965579865419735e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.5582605527506934
          entropy_coeff: 0.009999999999999998
          kl: 0.28588868187119565
          policy_loss: -0.034292434652646385
          total_loss: -0.017750555608007642
          vf_explained_var: -0.07817365974187851
          vf_loss: 0.022124479874037206
    num_agent_steps_sampled: 463000
    num_agent_steps_trained: 463000
    num_steps_sampled: 463000
    num_steps_trained: 4630

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,463,6837.71,463000,-6.7523,-3.21,-10,675.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 464000
  custom_metrics: {}
  date: 2021-10-23_21-54-08
  done: false
  episode_len_mean: 679.29
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.792899999999902
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 583
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0448369798129605e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.3350100106663174
          entropy_coeff: 0.009999999999999998
          kl: 0.3602960642427206
          policy_loss: -0.030113120170103178
          total_loss: -0.023090180713269445
          vf_explained_var: 0.08860648423433304
          vf_loss: 0.010373038018587976
    num_agent_steps_sampled: 464000
    num_agent_steps_trained: 464000
    num_steps_sampled: 464000
    num_steps_trained: 46400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,464,6851.08,464000,-6.7929,-3.21,-10,679.29


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 465000
  custom_metrics: {}
  date: 2021-10-23_21-54-23
  done: false
  episode_len_mean: 682.63
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.8262999999999
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 584
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5672554697194405e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.6398150455620554
          entropy_coeff: 0.009999999999999998
          kl: 0.5113634935683674
          policy_loss: -0.06760643066631424
          total_loss: -0.056892480618423884
          vf_explained_var: -0.6817702651023865
          vf_loss: 0.017112100720987657
    num_agent_steps_sampled: 465000
    num_agent_steps_trained: 465000
    num_steps_sampled: 465000
    num_steps_trained: 465000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,465,6865.74,465000,-6.8263,-3.21,-10,682.63


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 466000
  custom_metrics: {}
  date: 2021-10-23_21-54-37
  done: false
  episode_len_mean: 684.97
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.8496999999999
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 585
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3508832045791616e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8030652953518762
          entropy_coeff: 0.009999999999999998
          kl: 0.5034198039935694
          policy_loss: -0.007146033106578721
          total_loss: 0.005177793030937513
          vf_explained_var: -0.6798866391181946
          vf_loss: 0.020354475022759288
    num_agent_steps_sampled: 466000
    num_agent_steps_trained: 466000
    num_steps_sampled: 466000
    num_steps_trained: 466000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,466,6879.35,466000,-6.8497,-3.21,-10,684.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 467000
  custom_metrics: {}
  date: 2021-10-23_21-54-50
  done: false
  episode_len_mean: 688.56
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.885599999999901
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 586
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5263248068687414e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.8144236079520649
          entropy_coeff: 0.009999999999999998
          kl: 2.3099384640653926
          policy_loss: -0.006800306836764017
          total_loss: 0.011253530780474344
          vf_explained_var: -0.3537053167819977
          vf_loss: 0.02619807333038706
    num_agent_steps_sampled: 467000
    num_agent_steps_trained: 467000
    num_steps_sampled: 467000
    num_steps_trained: 467000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,467,6892.67,467000,-6.8856,-3.21,-10,688.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 468000
  custom_metrics: {}
  date: 2021-10-23_21-55-01
  done: false
  episode_len_mean: 691.47
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.914699999999898
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 587
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.289487210303113e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.9090368039078183
          entropy_coeff: 0.009999999999999998
          kl: 1.638820884997646
          policy_loss: 0.08392866514623165
          total_loss: 0.08356277288662063
          vf_explained_var: -0.39162102341651917
          vf_loss: 0.008724474529865094
    num_agent_steps_sampled: 468000
    num_agent_steps_trained: 468000
    num_steps_sampled: 468000
    num_steps_trained: 468000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,468,6903.57,468000,-6.9147,-3.21,-10,691.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 469000
  custom_metrics: {}
  date: 2021-10-23_21-55-15
  done: false
  episode_len_mean: 694.5
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.944999999999897
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 588
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.934230815454667e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.3368603903386328
          entropy_coeff: 0.009999999999999998
          kl: 0.41396944550797343
          policy_loss: -0.10575340282585886
          total_loss: -0.0949927132162783
          vf_explained_var: -0.6721556186676025
          vf_loss: 0.014129294672360022
    num_agent_steps_sampled: 469000
    num_agent_steps_trained: 469000
    num_steps_sampled: 469000
    num_steps_trained: 469000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,469,6917.49,469000,-6.945,-3.21,-10,694.5


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 470000
  custom_metrics: {}
  date: 2021-10-23_21-55-30
  done: false
  episode_len_mean: 698.03
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -6.980299999999897
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 589
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1901346223182008e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.26074193778137367
          entropy_coeff: 0.009999999999999998
          kl: 0.16466795408891308
          policy_loss: -0.07410443708714512
          total_loss: -0.06513791022201379
          vf_explained_var: 0.02167881838977337
          vf_loss: 0.01157394524715427
    num_agent_steps_sampled: 470000
    num_agent_steps_trained: 470000
    num_steps_sampled: 470000
    num_steps_trained: 470000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,470,6932.96,470000,-6.9803,-3.21,-10,698.03


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 471000
  custom_metrics: {}
  date: 2021-10-23_21-55-44
  done: false
  episode_len_mean: 702.39
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.023899999999895
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 590
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7852019334773013e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.5983662465380297
          entropy_coeff: 0.009999999999999998
          kl: 0.3083339278275768
          policy_loss: -0.009365185846885045
          total_loss: -0.00046420610613293116
          vf_explained_var: -0.5902374982833862
          vf_loss: 0.014884640036163748
    num_agent_steps_sampled: 471000
    num_agent_steps_trained: 471000
    num_steps_sampled: 471000
    num_steps_trained: 471

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,471,6946.97,471000,-7.0239,-3.21,-10,702.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 472000
  custom_metrics: {}
  date: 2021-10-23_21-55-58
  done: false
  episode_len_mean: 706.61
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.066099999999895
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 591
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6778029002159513e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.4730607595708635
          entropy_coeff: 0.009999999999999998
          kl: 0.36661154329776763
          policy_loss: 0.031549133360385895
          total_loss: 0.043877939217620426
          vf_explained_var: 0.2514423429965973
          vf_loss: 0.0170594077240417
    num_agent_steps_sampled: 472000
    num_agent_steps_trained: 472000
    num_steps_sampled: 472000
    num_steps_trained: 472000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,472,6960.29,472000,-7.0661,-3.21,-10,706.61


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 473000
  custom_metrics: {}
  date: 2021-10-23_21-56-13
  done: false
  episode_len_mean: 710.3
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.102999999999894
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 592
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.016704350323927e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.40489203590485784
          entropy_coeff: 0.009999999999999998
          kl: 0.378385833133426
          policy_loss: -0.06752548722757233
          total_loss: -0.05248767847402228
          vf_explained_var: -0.31950652599334717
          vf_loss: 0.01908672890988075
    num_agent_steps_sampled: 473000
    num_agent_steps_trained: 473000
    num_steps_sampled: 473000
    num_steps_trained: 473000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,473,6975.16,473000,-7.103,-3.21,-10,710.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 474000
  custom_metrics: {}
  date: 2021-10-23_21-56-25
  done: false
  episode_len_mean: 714.21
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.142099999999893
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 593
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.02505652548589e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.5422557075818379
          entropy_coeff: 0.009999999999999998
          kl: 0.9351105567481782
          policy_loss: -0.1713510191275014
          total_loss: -0.16006539265314737
          vf_explained_var: 0.941481351852417
          vf_loss: 0.01670818493132376
    num_agent_steps_sampled: 474000
    num_agent_steps_trained: 474000
    num_steps_sampled: 474000
    num_steps_trained: 474000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,474,6987.96,474000,-7.1421,-3.21,-10,714.21


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 475000
  custom_metrics: {}
  date: 2021-10-23_21-56-39
  done: false
  episode_len_mean: 719.19
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.191899999999891
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 594
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.037584788228836e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.7369743085569805
          entropy_coeff: 0.009999999999999998
          kl: 2.021482616176622
          policy_loss: 0.19630030989646913
          total_loss: 0.21303025086720784
          vf_explained_var: -0.1673329919576645
          vf_loss: 0.024099680640049175
    num_agent_steps_sampled: 475000
    num_agent_steps_trained: 475000
    num_steps_sampled: 475000
    num_steps_trained: 475000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,475,7001.36,475000,-7.1919,-3.21,-10,719.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 476000
  custom_metrics: {}
  date: 2021-10-23_21-56-54
  done: false
  episode_len_mean: 724.04
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.240399999999892
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 595
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3556377182343254e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.226559292152524
          entropy_coeff: 0.009999999999999998
          kl: 0.15243466368152037
          policy_loss: -0.07009556591510772
          total_loss: -0.059119311885701284
          vf_explained_var: -0.668738067150116
          vf_loss: 0.01324184823832992
    num_agent_steps_sampled: 476000
    num_agent_steps_trained: 476000
    num_steps_sampled: 476000
    num_steps_trained: 476000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,476,7016.07,476000,-7.2404,-3.21,-10,724.04


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 477000
  custom_metrics: {}
  date: 2021-10-23_21-57-08
  done: false
  episode_len_mean: 728.83
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.288299999999889
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 596
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.033456577351488e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.3413675689034992
          entropy_coeff: 0.009999999999999998
          kl: 0.8439062516722414
          policy_loss: -0.03580164955928922
          total_loss: -0.025013462961133985
          vf_explained_var: -0.3934858441352844
          vf_loss: 0.01420186410476971
    num_agent_steps_sampled: 477000
    num_agent_steps_trained: 477000
    num_steps_sampled: 477000
    num_steps_trained: 477000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,477,7030.08,477000,-7.2883,-3.21,-10,728.83


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 478000
  custom_metrics: {}
  date: 2021-10-23_21-57-22
  done: false
  episode_len_mean: 732.01
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.320099999999888
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 597
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0501848660272324e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.5706845574908787
          entropy_coeff: 0.009999999999999998
          kl: 0.10320823902471198
          policy_loss: 0.026953658958276113
          total_loss: 0.04316432451208432
          vf_explained_var: 0.2673974335193634
          vf_loss: 0.021917511073924187
    num_agent_steps_sampled: 478000
    num_agent_steps_trained: 478000
    num_steps_sampled: 478000
    num_steps_trained: 478000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,478,7044.6,478000,-7.3201,-3.21,-10,732.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 479000
  custom_metrics: {}
  date: 2021-10-23_21-57-37
  done: false
  episode_len_mean: 735.66
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.3565999999998875
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 598
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.575277299040849e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.45439828038215635
          entropy_coeff: 0.009999999999999998
          kl: 0.7399037964642048
          policy_loss: 0.11309138735135396
          total_loss: 0.14391387667920855
          vf_explained_var: -0.3805278241634369
          vf_loss: 0.035366479124382344
    num_agent_steps_sampled: 479000
    num_agent_steps_trained: 479000
    num_steps_sampled: 479000
    num_steps_trained: 479000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,479,7059.24,479000,-7.3566,-3.21,-10,735.66


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 480000
  custom_metrics: {}
  date: 2021-10-23_21-57-51
  done: false
  episode_len_mean: 739.89
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.3988999999998875
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 599
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.8629159485612745e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.37020364469952055
          entropy_coeff: 0.009999999999999998
          kl: 0.4521474370629423
          policy_loss: -0.06700946850081285
          total_loss: -0.056141221564677025
          vf_explained_var: -0.682604193687439
          vf_loss: 0.014570279903192486
    num_agent_steps_sampled: 480000
    num_agent_steps_trained: 480000
    num_steps_sampled: 480000
    num_steps_trained: 48000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,480,7073.9,480000,-7.3989,-3.21,-10,739.89




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 481000
  custom_metrics: {}
  date: 2021-10-23_21-58-25
  done: false
  episode_len_mean: 744.08
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.440799999999886
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 600
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0294373922841911e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7598755672574043
          entropy_coeff: 0.009999999999999998
          kl: 0.4198024318036106
          policy_loss: -0.03606483671400282
          total_loss: -0.02996552363038063
          vf_explained_var: -0.7026253342628479
          vf_loss: 0.013698069722805586
    num_agent_steps_sampled: 481000
    num_agent_steps_trained: 481000
    num_steps_sampled: 481000
    num_steps_trained: 481000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,481,7106.99,481000,-7.4408,-3.21,-10,744.08


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 482000
  custom_metrics: {}
  date: 2021-10-23_21-58-39
  done: false
  episode_len_mean: 748.91
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.489099999999885
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 601
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.544156088426287e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.4302668931997485
          entropy_coeff: 0.009999999999999998
          kl: 0.942580075810353
          policy_loss: -0.03844034804238213
          total_loss: 0.25757156047556135
          vf_explained_var: 0.044443946331739426
          vf_loss: 0.3003145778551698
    num_agent_steps_sampled: 482000
    num_agent_steps_trained: 482000
    num_steps_sampled: 482000
    num_steps_trained: 482000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,482,7121.2,482000,-7.4891,-3.21,-10,748.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 483000
  custom_metrics: {}
  date: 2021-10-23_21-58-53
  done: false
  episode_len_mean: 752.87
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.528699999999884
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 602
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3162341326394298e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.27492404845025803
          entropy_coeff: 0.009999999999999998
          kl: 0.2670957509221302
          policy_loss: 0.11878001971377267
          total_loss: 0.1955372129049566
          vf_explained_var: -0.3830845355987549
          vf_loss: 0.07950643606923727
    num_agent_steps_sampled: 483000
    num_agent_steps_trained: 483000
    num_steps_sampled: 483000
    num_steps_trained: 483000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,483,7135.8,483000,-7.5287,-3.21,-10,752.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 484000
  custom_metrics: {}
  date: 2021-10-23_21-59-09
  done: false
  episode_len_mean: 756.54
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.565399999999884
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 603
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.474351198959144e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.762408876253499
          entropy_coeff: 0.009999999999999998
          kl: 2.470187911391258
          policy_loss: -0.07566258708635966
          total_loss: -0.04231341282526652
          vf_explained_var: -0.4443584978580475
          vf_loss: 0.040973268325130145
    num_agent_steps_sampled: 484000
    num_agent_steps_trained: 484000
    num_steps_sampled: 484000
    num_steps_trained: 484000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,484,7151.33,484000,-7.5654,-3.21,-10,756.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 485000
  custom_metrics: {}
  date: 2021-10-23_21-59-24
  done: false
  episode_len_mean: 760.85
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.608499999999881
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 604
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.211526798438716e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.7675643496215343
          entropy_coeff: 0.009999999999999998
          kl: 2.2447773627108996
          policy_loss: -0.07771487683057784
          total_loss: -0.061494039330217576
          vf_explained_var: -0.4821850657463074
          vf_loss: 0.023896481230945534
    num_agent_steps_sampled: 485000
    num_agent_steps_trained: 485000
    num_steps_sampled: 485000
    num_steps_trained: 485000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,485,7166.39,485000,-7.6085,-3.21,-10,760.85


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 486000
  custom_metrics: {}
  date: 2021-10-23_21-59-40
  done: false
  episode_len_mean: 766.95
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.669499999999882
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 605
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.817290197658075e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.43489049019085035
          entropy_coeff: 0.009999999999999998
          kl: 1.0929643448649182
          policy_loss: -0.04036759618255827
          total_loss: -0.018205599145342907
          vf_explained_var: -0.6462680101394653
          vf_loss: 0.026510903651877825
    num_agent_steps_sampled: 486000
    num_agent_steps_trained: 486000
    num_steps_sampled: 486000
    num_steps_trained: 486000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,486,7182.51,486000,-7.6695,-3.21,-10,766.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 487000
  custom_metrics: {}
  date: 2021-10-23_21-59-56
  done: false
  episode_len_mean: 771.91
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.719099999999881
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 606
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1725935296487114e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.33294334444734786
          entropy_coeff: 0.009999999999999998
          kl: 0.6233949452017744
          policy_loss: 0.1291757724351353
          total_loss: 0.14156638921962844
          vf_explained_var: -0.42210039496421814
          vf_loss: 0.015720052391083705
    num_agent_steps_sampled: 487000
    num_agent_steps_trained: 487000
    num_steps_sampled: 487000
    num_steps_trained: 487000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,487,7198.32,487000,-7.7191,-3.21,-10,771.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 488000
  custom_metrics: {}
  date: 2021-10-23_22-00-11
  done: false
  episode_len_mean: 775.84
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.75839999999988
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 607
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7588902944730673e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.5113030379017194
          entropy_coeff: 0.009999999999999998
          kl: 0.6914123991297351
          policy_loss: -0.09707319827543365
          total_loss: -0.09082315415143967
          vf_explained_var: -0.6133522987365723
          vf_loss: 0.01136307438201685
    num_agent_steps_sampled: 488000
    num_agent_steps_trained: 488000
    num_steps_sampled: 488000
    num_steps_trained: 488000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,488,7213.36,488000,-7.7584,-3.21,-10,775.84


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 489000
  custom_metrics: {}
  date: 2021-10-23_22-00-26
  done: false
  episode_len_mean: 779.42
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.79419999999988
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 608
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6383354417096007e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.36410891711711885
          entropy_coeff: 0.009999999999999998
          kl: 0.8362091352542241
          policy_loss: 0.052600625157356265
          total_loss: 0.05748185394331813
          vf_explained_var: -0.49496790766716003
          vf_loss: 0.008522317762900558
    num_agent_steps_sampled: 489000
    num_agent_steps_trained: 489000
    num_steps_sampled: 489000
    num_steps_trained: 489000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,489,7227.73,489000,-7.7942,-3.21,-10,779.42


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 490000
  custom_metrics: {}
  date: 2021-10-23_22-00-42
  done: false
  episode_len_mean: 784.45
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.84449999999988
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 609
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9575031625644e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.3494775086641312
          entropy_coeff: 0.009999999999999998
          kl: 1.5554932418796752
          policy_loss: -0.07856130103270213
          total_loss: -0.0733663197606802
          vf_explained_var: -0.2972714900970459
          vf_loss: 0.008689756189576453
    num_agent_steps_sampled: 490000
    num_agent_steps_trained: 490000
    num_steps_sampled: 490000
    num_steps_trained: 490000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,490,7243.78,490000,-7.8445,-3.21,-10,784.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 491000
  custom_metrics: {}
  date: 2021-10-23_22-00-57
  done: false
  episode_len_mean: 787.11
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.8710999999998785
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 610
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.9362547438466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.3814264408416218
          entropy_coeff: 0.009999999999999998
          kl: 0.5119706867469682
          policy_loss: 0.0437007659011417
          total_loss: 0.04440200361940596
          vf_explained_var: -0.48831015825271606
          vf_loss: 0.004515504658532639
    num_agent_steps_sampled: 491000
    num_agent_steps_trained: 491000
    num_steps_sampled: 491000
    num_steps_trained: 491000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,491,7259.34,491000,-7.8711,-3.21,-10,787.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 492000
  custom_metrics: {}
  date: 2021-10-23_22-01-12
  done: false
  episode_len_mean: 789.33
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.893299999999877
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 611
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.904382115769901e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.46419413313269614
          entropy_coeff: 0.009999999999999998
          kl: 0.3397605178049869
          policy_loss: -0.06053016136089961
          total_loss: -0.06130444788270527
          vf_explained_var: -0.5686880946159363
          vf_loss: 0.0038676576494860154
    num_agent_steps_sampled: 492000
    num_agent_steps_trained: 492000
    num_steps_sampled: 492000
    num_steps_trained: 492000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,492,7274.15,492000,-7.8933,-3.21,-10,789.33


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 493000
  custom_metrics: {}
  date: 2021-10-23_22-01-25
  done: false
  episode_len_mean: 791.17
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.911699999999877
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 612
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3356573173654851e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.3199300997787052
          entropy_coeff: 0.009999999999999998
          kl: 0.17982480180346302
          policy_loss: -0.03556462890572018
          total_loss: -0.02764749481446213
          vf_explained_var: -0.7364994287490845
          vf_loss: 0.011116434235656116
    num_agent_steps_sampled: 493000
    num_agent_steps_trained: 493000
    num_steps_sampled: 493000
    num_steps_trained: 493000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,493,7286.82,493000,-7.9117,-3.21,-10,791.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 494000
  custom_metrics: {}
  date: 2021-10-23_22-01-38
  done: false
  episode_len_mean: 792.71
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.927099999999877
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 613
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.003485976048228e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.1707357274989287
          entropy_coeff: 0.009999999999999998
          kl: 0.21761990510341195
          policy_loss: -0.04661627436677615
          total_loss: -0.0431569780740473
          vf_explained_var: -0.6755556464195251
          vf_loss: 0.0051666536349027105
    num_agent_steps_sampled: 494000
    num_agent_steps_trained: 494000
    num_steps_sampled: 494000
    num_steps_trained: 494000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,494,7300.38,494000,-7.9271,-3.21,-10,792.71


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 495000
  custom_metrics: {}
  date: 2021-10-23_22-01-54
  done: false
  episode_len_mean: 795.5
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.954999999999876
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 614
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0052289640723416e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.569036366045475
          entropy_coeff: 0.009999999999999998
          kl: 2.063972504809499
          policy_loss: -0.0841611026165386
          total_loss: -0.08276147829989593
          vf_explained_var: -0.455032080411911
          vf_loss: 0.007089985738275573
    num_agent_steps_sampled: 495000
    num_agent_steps_trained: 495000
    num_steps_sampled: 495000
    num_steps_trained: 495000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,495,7315.6,495000,-7.955,-3.21,-10,795.5


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 496000
  custom_metrics: {}
  date: 2021-10-23_22-02-10
  done: false
  episode_len_mean: 797.3
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -7.9729999999998755
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 615
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.5078434461085126e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.12736810089813339
          entropy_coeff: 0.009999999999999998
          kl: 1.0104589814113245
          policy_loss: -0.059010144571463266
          total_loss: -0.05357565436926153
          vf_explained_var: -0.31053993105888367
          vf_loss: 0.006708168242281923
    num_agent_steps_sampled: 496000
    num_agent_steps_trained: 496000
    num_steps_sampled: 496000
    num_steps_trained: 4960

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,496,7332.14,496000,-7.973,-3.21,-10,797.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 497000
  custom_metrics: {}
  date: 2021-10-23_22-02-25
  done: false
  episode_len_mean: 802.12
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -8.021199999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 616
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.761765169162766e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.6377068604032199
          entropy_coeff: 0.009999999999999998
          kl: 1.5826459667748876
          policy_loss: -0.051003460151453815
          total_loss: -0.044687340615524186
          vf_explained_var: -0.1951819360256195
          vf_loss: 0.012693182245776471
    num_agent_steps_sampled: 497000
    num_agent_steps_trained: 497000
    num_steps_sampled: 497000
    num_steps_trained: 497000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,497,7346.84,497000,-8.0212,-3.21,-10,802.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 498000
  custom_metrics: {}
  date: 2021-10-23_22-02-40
  done: false
  episode_len_mean: 812.86
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -8.12859999999987
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 618
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0142647753744155e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.43007450766033595
          entropy_coeff: 0.009999999999999998
          kl: 0.3567064592407809
          policy_loss: -0.10000721779134539
          total_loss: -0.09887992805904812
          vf_explained_var: -0.3378029465675354
          vf_loss: 0.005428029506260322
    num_agent_steps_sampled: 498000
    num_agent_steps_trained: 498000
    num_steps_sampled: 498000
    num_steps_trained: 498000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,498,7362.31,498000,-8.1286,-3.21,-10,812.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 499000
  custom_metrics: {}
  date: 2021-10-23_22-02-57
  done: false
  episode_len_mean: 814.7
  episode_media: {}
  episode_reward_max: -3.2099999999999755
  episode_reward_mean: -8.146999999999872
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 619
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5213971630616235e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.42043102011084554
          entropy_coeff: 0.009999999999999998
          kl: 0.9400156516167852
          policy_loss: 0.06810688210858239
          total_loss: 0.07038563622368707
          vf_explained_var: -0.07578599452972412
          vf_loss: 0.006483053278902339
    num_agent_steps_sampled: 499000
    num_agent_steps_trained: 499000
    num_steps_sampled: 499000
    num_steps_trained: 499000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,499,7378.75,499000,-8.147,-3.21,-10,814.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 500000
  custom_metrics: {}
  date: 2021-10-23_22-03-12
  done: false
  episode_len_mean: 821.49
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.214899999999869
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 620
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2820957445924346e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.705392529649867
          entropy_coeff: 0.009999999999999998
          kl: 1.639702016611894
          policy_loss: 0.0106179502275255
          total_loss: 0.007370739512973361
          vf_explained_var: -0.21141578257083893
          vf_loss: 0.0038066795111970147
    num_agent_steps_sampled: 500000
    num_agent_steps_trained: 500000
    num_steps_sampled: 500000
    num_steps_trained: 500000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,500,7393.69,500000,-8.2149,-3.23,-10,821.49


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 501000
  custom_metrics: {}
  date: 2021-10-23_22-03-27
  done: false
  episode_len_mean: 821.95
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.21949999999987
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 621
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.423143616888653e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6686040530602138
          entropy_coeff: 0.009999999999999998
          kl: 1.9392502384053336
          policy_loss: 0.04530499180157979
          total_loss: 0.04313456482357449
          vf_explained_var: -0.30774325132369995
          vf_loss: 0.004515555546256817
    num_agent_steps_sampled: 501000
    num_agent_steps_trained: 501000
    num_steps_sampled: 501000
    num_steps_trained: 501000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,501,7408.39,501000,-8.2195,-3.23,-10,821.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 502000
  custom_metrics: {}
  date: 2021-10-23_22-03-43
  done: false
  episode_len_mean: 826.87
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.26869999999987
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 622
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.134715425332978e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9772100296285418
          entropy_coeff: 0.009999999999999998
          kl: 1.4703728324837155
          policy_loss: -0.006634742518266042
          total_loss: -0.007045517199569278
          vf_explained_var: -0.3570877015590668
          vf_loss: 0.009361251862719655
    num_agent_steps_sampled: 502000
    num_agent_steps_trained: 502000
    num_steps_sampled: 502000
    num_steps_trained: 502000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,502,7425.21,502000,-8.2687,-3.23,-10,826.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 503000
  custom_metrics: {}
  date: 2021-10-23_22-04-00
  done: false
  episode_len_mean: 832.25
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.322499999999868
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 623
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.702073137999464e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.6838961640993754
          entropy_coeff: 0.009999999999999998
          kl: 1.9043179614676369
          policy_loss: 0.05460741917292277
          total_loss: 0.05546809451447593
          vf_explained_var: -0.41804039478302
          vf_loss: 0.007699490373488516
    num_agent_steps_sampled: 503000
    num_agent_steps_trained: 503000
    num_steps_sampled: 503000
    num_steps_trained: 503000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,503,7441.8,503000,-8.3225,-3.23,-10,832.25


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 504000
  custom_metrics: {}
  date: 2021-10-23_22-04-15
  done: false
  episode_len_mean: 832.55
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.325499999999868
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 624
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.15531097069992e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.9873564364181624
          entropy_coeff: 0.009999999999999998
          kl: 1.0914600789546967
          policy_loss: 0.1901130747463968
          total_loss: 0.19301985634697807
          vf_explained_var: 0.07535043358802795
          vf_loss: 0.012780224831981792
    num_agent_steps_sampled: 504000
    num_agent_steps_trained: 504000
    num_steps_sampled: 504000
    num_steps_trained: 504000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,504,7456.68,504000,-8.3255,-3.23,-10,832.55


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 505000
  custom_metrics: {}
  date: 2021-10-23_22-04-30
  done: false
  episode_len_mean: 834.92
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.349199999999867
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 625
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7329664560498796e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.6796431124210358
          entropy_coeff: 0.009999999999999998
          kl: 1.2916848553551568
          policy_loss: 0.3081453283627828
          total_loss: 0.3075770240690973
          vf_explained_var: -0.9885995388031006
          vf_loss: 0.006227901380043477
    num_agent_steps_sampled: 505000
    num_agent_steps_trained: 505000
    num_steps_sampled: 505000
    num_steps_trained: 505000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,505,7471.81,505000,-8.3492,-3.23,-10,834.92


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 506000
  custom_metrics: {}
  date: 2021-10-23_22-04-43
  done: false
  episode_len_mean: 838.07
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.380699999999866
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 626
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.59944968407482e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0701325866911147
          entropy_coeff: 0.009999999999999998
          kl: 2.012980369064543
          policy_loss: 0.13972560270792908
          total_loss: 0.34596667674680553
          vf_explained_var: -0.5322007536888123
          vf_loss: 0.2169418803571413
    num_agent_steps_sampled: 506000
    num_agent_steps_trained: 506000
    num_steps_sampled: 506000
    num_steps_trained: 506000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,506,7485.02,506000,-8.3807,-3.23,-10,838.07


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 507000
  custom_metrics: {}
  date: 2021-10-23_22-04-56
  done: false
  episode_len_mean: 841.1
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.410999999999866
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 627
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.899174526112229e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8700029505623712
          entropy_coeff: 0.009999999999999998
          kl: 3.9394884831375547
          policy_loss: 0.13203873634338378
          total_loss: 0.16725670248270036
          vf_explained_var: -0.4275796711444855
          vf_loss: 0.04391646146670812
    num_agent_steps_sampled: 507000
    num_agent_steps_trained: 507000
    num_steps_sampled: 507000
    num_steps_trained: 507000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,507,7497.68,507000,-8.411,-3.23,-10,841.1


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 508000
  custom_metrics: {}
  date: 2021-10-23_22-05-09
  done: false
  episode_len_mean: 843.74
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.437399999999865
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 628
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.848761789168344e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.8938157068358528
          entropy_coeff: 0.009999999999999998
          kl: 0.18530895709991455
          policy_loss: 0.1127701733675268
          total_loss: 0.14723981213238505
          vf_explained_var: 0.0004014505248051137
          vf_loss: 0.04340768764183546
    num_agent_steps_sampled: 508000
    num_agent_steps_trained: 508000
    num_steps_sampled: 508000
    num_steps_trained: 508000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,508,7510.99,508000,-8.4374,-3.23,-10,843.74




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 509000
  custom_metrics: {}
  date: 2021-10-23_22-05-27
  done: false
  episode_len_mean: 843.86
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.438599999999864
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 629
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.773142683752515e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.26705274898558856
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.2726409996135367
          total_loss: .inf
          vf_explained_var: 0.10102447867393494
          vf_loss: 595.2355049981011
    num_agent_steps_sampled: 509000
    num_agent_steps_trained: 509000
    num_steps_sampled: 509000
    num_steps_trained: 509000
  iterations_since_restore: 509
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,509,7528.49,509000,-8.4386,-3.23,-10,843.86




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 510000
  custom_metrics: {}
  date: 2021-10-23_22-05-58
  done: false
  episode_len_mean: 846.51
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.465099999999863
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 630
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3159714025628773e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.013345432746832052
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.11342298264304797
          total_loss: .inf
          vf_explained_var: -0.6754924654960632
          vf_loss: 514.8863915019565
    num_agent_steps_sampled: 510000
    num_agent_steps_trained: 510000
    num_steps_sampled: 510000
    num_steps_trained: 510000
  iterations_since_restore: 510


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,510,7559.98,510000,-8.4651,-3.23,-10,846.51




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 511000
  custom_metrics: {}
  date: 2021-10-23_22-06-13
  done: false
  episode_len_mean: 848.16
  episode_media: {}
  episode_reward_max: -3.229999999999975
  episode_reward_mean: -8.481599999999863
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 631
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.973957103844316e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.017144602768692292
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.024799863580200407
          total_loss: .inf
          vf_explained_var: -0.07529346644878387
          vf_loss: 1506.8134351942274
    num_agent_steps_sampled: 511000
    num_agent_steps_trained: 511000
    num_steps_sampled: 511000
    num_steps_trained: 511000
  iterations_since_restore: 511

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,511,7574.71,511000,-8.4816,-3.23,-10,848.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 512000
  custom_metrics: {}
  date: 2021-10-23_22-06-49
  done: false
  episode_len_mean: 835.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.356399999999867
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 3
  episodes_total: 634
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9609356557664748e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.005945729797612229
          entropy_coeff: 0.009999999999999998
          kl: 7.30582325176945
          policy_loss: -0.04313322253939178
          total_loss: 1965.8497158474393
          vf_explained_var: 0.046724218875169754
          vf_loss: 1965.8928890652128
    num_agent_steps_sampled: 512000
    num_agent_steps_trained: 512000
    num_steps_sampled: 512000
    num_steps_trained: 512000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,512,7610.49,512000,-8.3564,-2,-10,835.64




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 513000
  custom_metrics: {}
  date: 2021-10-23_22-07-45
  done: false
  episode_len_mean: 805.7
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.05699999999987
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 5
  episodes_total: 639
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.44140348364971e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.005703512516467729
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.22739485618140962
          total_loss: .inf
          vf_explained_var: -0.06452753394842148
          vf_loss: 31334.75947265625
    num_agent_steps_sampled: 513000
    num_agent_steps_trained: 513000
    num_steps_sampled: 513000
    num_steps_trained: 513000
  iterations_since_restore: 513
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,513,7666.92,513000,-8.057,-2,-10,805.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 514000
  custom_metrics: {}
  date: 2021-10-23_22-07-59
  done: false
  episode_len_mean: 803.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.036399999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 640
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.662105225474567e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.01798202421415426
          entropy_coeff: 0.009999999999999998
          kl: 0.2409456302722295
          policy_loss: -0.043352021090686324
          total_loss: 38853.14128689236
          vf_explained_var: -0.22727197408676147
          vf_loss: 38853.18432074653
    num_agent_steps_sampled: 514000
    num_agent_steps_trained: 514000
    num_steps_sampled: 514000
    num_steps_trained: 514000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,514,7680.06,514000,-8.0364,-2,-10,803.64




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 515000
  custom_metrics: {}
  date: 2021-10-23_22-08-14
  done: false
  episode_len_mean: 803.92
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.039199999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 641
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.993157838211848e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.00014988010533322223
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07910291718112097
          total_loss: .inf
          vf_explained_var: -0.1257116198539734
          vf_loss: 10899.632183159722
    num_agent_steps_sampled: 515000
    num_agent_steps_trained: 515000
    num_steps_sampled: 515000
    num_steps_trained: 515000
  iterations_since_restore: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,515,7695.24,515000,-8.0392,-2,-10,803.92




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 516000
  custom_metrics: {}
  date: 2021-10-23_22-08-29
  done: false
  episode_len_mean: 807.41
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.074099999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 642
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4989736757317775e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.2624173123893082e-05
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.05677761352724499
          total_loss: .inf
          vf_explained_var: -0.07851375639438629
          vf_loss: 5272.039621310764
    num_agent_steps_sampled: 516000
    num_agent_steps_trained: 516000
    num_steps_sampled: 516000
    num_steps_trained: 516000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,516,7710.01,516000,-8.0741,-2,-10,807.41




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 517000
  custom_metrics: {}
  date: 2021-10-23_22-08-42
  done: false
  episode_len_mean: 810.01
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.100099999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 643
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2484605135976664e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0518538710448499e-11
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04851250102122625
          total_loss: .inf
          vf_explained_var: -0.2898189425468445
          vf_loss: 3189.5114230685763
    num_agent_steps_sampled: 517000
    num_agent_steps_trained: 517000
    num_steps_sampled: 517000
    num_steps_trained: 517000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,517,7723.5,517000,-8.1001,-2,-10,810.01




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 518000
  custom_metrics: {}
  date: 2021-10-23_22-08-56
  done: false
  episode_len_mean: 810.01
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.100099999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 644
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3726907703964986e-05
          cur_lr: 5.000000000000001e-05
          entropy: 7.527427362793549e-13
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03794055183728536
          total_loss: .inf
          vf_explained_var: -0.333333283662796
          vf_loss: 3167.2309848361547
    num_agent_steps_sampled: 518000
    num_agent_steps_trained: 518000
    num_steps_sampled: 518000
    num_steps_trained: 518000
  iterations_since_restore: 51

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,518,7737.58,518000,-8.1001,-2,-10,810.01




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 519000
  custom_metrics: {}
  date: 2021-10-23_22-09-10
  done: false
  episode_len_mean: 810.13
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.101299999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 645
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.059036155594748e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0163224909560023e-12
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03053549552957217
          total_loss: .inf
          vf_explained_var: -0.33333325386047363
          vf_loss: 4757.941338602702
    num_agent_steps_sampled: 519000
    num_agent_steps_trained: 519000
    num_steps_sampled: 519000
    num_steps_trained: 519000
  iterations_since_restore: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,519,7750.86,519000,-8.1013,-2,-10,810.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 520000
  custom_metrics: {}
  date: 2021-10-23_22-09-24
  done: false
  episode_len_mean: 812.1
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.120999999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 646
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.588554233392122e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.448788977263587e-12
          entropy_coeff: 0.009999999999999998
          kl: -1.5166683437349473e-15
          policy_loss: -0.025436364114284515
          total_loss: 8710.123353407118
          vf_explained_var: -0.333333283662796
          vf_loss: 8710.148657565647
    num_agent_steps_sampled: 520000
    num_agent_steps_trained: 520000
    num_steps_sampled: 520000
    num_steps_trained: 520000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,520,7765.34,520000,-8.121,-2,-10,812.1


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 521000
  custom_metrics: {}
  date: 2021-10-23_22-09-38
  done: false
  episode_len_mean: 812.21
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.122099999999874
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 647
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.794277116696061e-05
          cur_lr: 5.000000000000001e-05
          entropy: 2.766081209316605e-06
          entropy_coeff: 0.009999999999999998
          kl: 3.4636918576292555e-07
          policy_loss: -0.022502274645699396
          total_loss: 15423.048091464572
          vf_explained_var: -0.3333331346511841
          vf_loss: 15423.071132914225
    num_agent_steps_sampled: 521000
    num_agent_steps_trained: 521000
    num_steps_sampled: 521000
    num_steps_trained: 5210

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,521,7779.53,521000,-8.1221,-2,-10,812.21


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 522000
  custom_metrics: {}
  date: 2021-10-23_22-09-52
  done: false
  episode_len_mean: 812.93
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.129299999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 648
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8971385583480304e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.3221502698809714e-05
          entropy_coeff: 0.009999999999999998
          kl: 3.8087796343654074e-06
          policy_loss: -0.021136583387851716
          total_loss: 20755.440687052407
          vf_explained_var: -0.3333333730697632
          vf_loss: 20755.461177741156
    num_agent_steps_sampled: 522000
    num_agent_steps_trained: 522000
    num_steps_sampled: 522000
    num_steps_trained: 52

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,522,7792.87,522000,-8.1293,-2,-10,812.93


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 523000
  custom_metrics: {}
  date: 2021-10-23_22-10-06
  done: false
  episode_len_mean: 815.29
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.152899999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 649
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.485692791740152e-06
          cur_lr: 5.000000000000001e-05
          entropy: 8.974642720977655e-06
          entropy_coeff: 0.009999999999999998
          kl: 2.110659528143774e-06
          policy_loss: -0.020352997134129205
          total_loss: 25965.407892523872
          vf_explained_var: -0.3333331346511841
          vf_loss: 25965.43191901313
    num_agent_steps_sampled: 523000
    num_agent_steps_trained: 523000
    num_steps_sampled: 523000
    num_steps_trained: 523000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,523,7807.34,523000,-8.1529,-2,-10,815.29


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 524000
  custom_metrics: {}
  date: 2021-10-23_22-10-20
  done: false
  episode_len_mean: 818.01
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.180099999999873
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 650
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.742846395870076e-06
          cur_lr: 5.000000000000001e-05
          entropy: 5.78724195271712e-06
          entropy_coeff: 0.009999999999999998
          kl: 1.0225576414546775e-06
          policy_loss: -0.019996407793627845
          total_loss: 27784.143784586588
          vf_explained_var: -0.3333333432674408
          vf_loss: 27784.162094455296
    num_agent_steps_sampled: 524000
    num_agent_steps_trained: 524000
    num_steps_sampled: 524000
    num_steps_trained: 52400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,524,7821.61,524000,-8.1801,-2,-10,818.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 525000
  custom_metrics: {}
  date: 2021-10-23_22-10-34
  done: false
  episode_len_mean: 821.8
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.217999999999872
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 651
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.371423197935038e-06
          cur_lr: 5.000000000000001e-05
          entropy: 5.791336925390726e-08
          entropy_coeff: 0.009999999999999998
          kl: 1.9488494336824165e-10
          policy_loss: -0.019819038609663647
          total_loss: 27594.65693020291
          vf_explained_var: -0.333333283662796
          vf_loss: 27594.67642449273
    num_agent_steps_sampled: 525000
    num_agent_steps_trained: 525000
    num_steps_sampled: 525000
    num_steps_trained: 525000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,525,7835.43,525000,-8.218,-2,-10,821.8


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 526000
  custom_metrics: {}
  date: 2021-10-23_22-10-48
  done: false
  episode_len_mean: 825.96
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.259599999999871
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 652
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.185711598967519e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.4222696067791833e-07
          entropy_coeff: 0.009999999999999998
          kl: 6.935809013539382e-10
          policy_loss: -0.019738761087258656
          total_loss: 25561.16637471517
          vf_explained_var: -0.3333333432674408
          vf_loss: 25561.180717976888
    num_agent_steps_sampled: 526000
    num_agent_steps_trained: 526000
    num_steps_sampled: 526000
    num_steps_trained: 52600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,526,7849.58,526000,-8.2596,-2,-10,825.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 527000
  custom_metrics: {}
  date: 2021-10-23_22-11-03
  done: false
  episode_len_mean: 831.69
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.316899999999869
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 653
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.928557994837595e-07
          cur_lr: 5.000000000000001e-05
          entropy: 2.936060084686905e-07
          entropy_coeff: 0.009999999999999998
          kl: 1.6591561754116432e-09
          policy_loss: -0.01961197331547737
          total_loss: 24744.091339450413
          vf_explained_var: -0.33333343267440796
          vf_loss: 24744.109073554144
    num_agent_steps_sampled: 527000
    num_agent_steps_trained: 527000
    num_steps_sampled: 527000
    num_steps_trained: 5270

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,527,7864.5,527000,-8.3169,-2,-10,831.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 528000
  custom_metrics: {}
  date: 2021-10-23_22-11-19
  done: false
  episode_len_mean: 834.96
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.34959999999987
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 654
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9642789974187975e-07
          cur_lr: 5.000000000000001e-05
          entropy: 5.424360507926767e-07
          entropy_coeff: 0.009999999999999998
          kl: 3.3226146673510156e-09
          policy_loss: -0.019561017553011577
          total_loss: 23016.18988477919
          vf_explained_var: -0.33333319425582886
          vf_loss: 23016.21104092068
    num_agent_steps_sampled: 528000
    num_agent_steps_trained: 528000
    num_steps_sampled: 528000
    num_steps_trained: 52800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,528,7880.02,528000,-8.3496,-2,-10,834.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 529000
  custom_metrics: {}
  date: 2021-10-23_22-11-33
  done: false
  episode_len_mean: 839.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.396399999999867
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 655
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4821394987093988e-07
          cur_lr: 5.000000000000001e-05
          entropy: 9.442666268060723e-07
          entropy_coeff: 0.009999999999999998
          kl: 5.7901351861270695e-09
          policy_loss: -0.019489053636789322
          total_loss: 21861.540621948243
          vf_explained_var: -0.3333333432674408
          vf_loss: 21861.559313964844
    num_agent_steps_sampled: 529000
    num_agent_steps_trained: 529000
    num_steps_sampled: 529000
    num_steps_trained: 529

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,529,7893.95,529000,-8.3964,-2,-10,839.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 530000
  custom_metrics: {}
  date: 2021-10-23_22-11-47
  done: false
  episode_len_mean: 844.68
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.446799999999866
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 656
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.410697493546994e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6109872378125247e-06
          entropy_coeff: 0.009999999999999998
          kl: 1.1669519750512335e-08
          policy_loss: -0.01948571453491847
          total_loss: 19673.367437744142
          vf_explained_var: -0.3333337604999542
          vf_loss: 19673.386909654404
    num_agent_steps_sampled: 530000
    num_agent_steps_trained: 530000
    num_steps_sampled: 530000
    num_steps_trained: 5300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,530,7908.01,530000,-8.4468,-2,-10,844.68


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 531000
  custom_metrics: {}
  date: 2021-10-23_22-12-02
  done: false
  episode_len_mean: 850.63
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.506299999999865
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 657
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.705348746773497e-08
          cur_lr: 5.000000000000001e-05
          entropy: 2.63905725470279e-06
          entropy_coeff: 0.009999999999999998
          kl: 1.7834512375556086e-08
          policy_loss: -0.01938822741309802
          total_loss: 19135.538226657445
          vf_explained_var: -0.33333367109298706
          vf_loss: 19135.555553860133
    num_agent_steps_sampled: 531000
    num_agent_steps_trained: 531000
    num_steps_sampled: 531000
    num_steps_trained: 53100

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,531,7923.34,531000,-8.5063,-2,-10,850.63


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 532000
  custom_metrics: {}
  date: 2021-10-23_22-12-18
  done: false
  episode_len_mean: 856.89
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.568899999999863
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 658
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8526743733867485e-08
          cur_lr: 5.000000000000001e-05
          entropy: 4.609352347273443e-06
          entropy_coeff: 0.009999999999999998
          kl: 1.9614891060528962e-07
          policy_loss: -0.019417819297975963
          total_loss: 16678.562965901692
          vf_explained_var: -0.33333346247673035
          vf_loss: 16678.58196648492
    num_agent_steps_sampled: 532000
    num_agent_steps_trained: 532000
    num_steps_sampled: 532000
    num_steps_trained: 532

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,532,7938.98,532000,-8.5689,-2,-10,856.89


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 533000
  custom_metrics: {}
  date: 2021-10-23_22-12-34
  done: false
  episode_len_mean: 862.6
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.625999999999863
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 659
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.263371866933742e-09
          cur_lr: 5.000000000000001e-05
          entropy: 7.880529070547481e-06
          entropy_coeff: 0.009999999999999998
          kl: 5.488559730161392e-07
          policy_loss: -0.019233293914132647
          total_loss: 17253.403594122992
          vf_explained_var: -0.3333335220813751
          vf_loss: 17253.423377821182
    num_agent_steps_sampled: 533000
    num_agent_steps_trained: 533000
    num_steps_sampled: 533000
    num_steps_trained: 533000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,533,7954.64,533000,-8.626,-2,-10,862.6




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 534000
  custom_metrics: {}
  date: 2021-10-23_22-13-06
  done: false
  episode_len_mean: 867.85
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.678499999999861
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 660
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.631685933466871e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.06527515562182909
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06708604999714428
          total_loss: .inf
          vf_explained_var: 0.21979783475399017
          vf_loss: 52193.1775346544
    num_agent_steps_sampled: 534000
    num_agent_steps_trained: 534000
    num_steps_sampled: 534000
    num_steps_trained: 534000
  iterations_since_restore: 534
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,534,7986.53,534000,-8.6785,-2,-10,867.85




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 535000
  custom_metrics: {}
  date: 2021-10-23_22-13-23
  done: false
  episode_len_mean: 873.95
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.739499999999857
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 661
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.947528900200305e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.09623601314581709
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1607398572895262
          total_loss: .inf
          vf_explained_var: 0.27364492416381836
          vf_loss: 710767.0980226304
    num_agent_steps_sampled: 535000
    num_agent_steps_trained: 535000
    num_steps_sampled: 535000
    num_steps_trained: 535000
  iterations_since_restore: 535
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,535,8003.74,535000,-8.7395,-2,-10,873.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 536000
  custom_metrics: {}
  date: 2021-10-23_22-13-37
  done: false
  episode_len_mean: 880.47
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.804699999999857
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 662
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0421293350300458e-08
          cur_lr: 5.000000000000001e-05
          entropy: 6.4549535783911875e-15
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.06637440125147502
          total_loss: 3976930.113888889
          vf_explained_var: 7.947286384535346e-09
          vf_loss: 3976930.136111111
    num_agent_steps_sampled: 536000
    num_agent_steps_trained: 536000
    num_steps_sampled: 536000
    num_steps_trained: 536000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,536,8017.91,536000,-8.8047,-2,-10,880.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 537000
  custom_metrics: {}
  date: 2021-10-23_22-13-50
  done: false
  episode_len_mean: 886.81
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.868099999999856
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 663
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.210646675150229e-09
          cur_lr: 5.000000000000001e-05
          entropy: 6.669646653274273e-24
          entropy_coeff: 0.009999999999999998
          kl: -2.641134809307311e-26
          policy_loss: -0.06629616146286328
          total_loss: 2172.617005750868
          vf_explained_var: 3.152423460051068e-07
          vf_loss: 2172.6832885742188
    num_agent_steps_sampled: 537000
    num_agent_steps_trained: 537000
    num_steps_sampled: 537000
    num_steps_trained: 5370

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,537,8031.18,537000,-8.8681,-2,-10,886.81


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 538000
  custom_metrics: {}
  date: 2021-10-23_22-14-01
  done: false
  episode_len_mean: 893.19
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.931899999999855
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 664
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6053233375751145e-09
          cur_lr: 5.000000000000001e-05
          entropy: 8.591204358007125e-24
          entropy_coeff: 0.009999999999999998
          kl: -4.05336841523113e-26
          policy_loss: -0.06629521027207375
          total_loss: 2077.8249484592016
          vf_explained_var: 1.986821445143505e-07
          vf_loss: 2077.891251627604
    num_agent_steps_sampled: 538000
    num_agent_steps_trained: 538000
    num_steps_sampled: 538000
    num_steps_trained: 5380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,538,8041.98,538000,-8.9319,-2,-10,893.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 539000
  custom_metrics: {}
  date: 2021-10-23_22-14-15
  done: false
  episode_len_mean: 899.96
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.999599999999855
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 665
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3026616687875572e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6331093219610346e-23
          entropy_coeff: 0.009999999999999998
          kl: -7.777635804696871e-26
          policy_loss: -0.06629326567053795
          total_loss: 1996.4768500434027
          vf_explained_var: 1.3907749973895989e-07
          vf_loss: 1996.543149142795
    num_agent_steps_sampled: 539000
    num_agent_steps_trained: 539000
    num_steps_sampled: 539000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,539,8055.74,539000,-8.9996,-2,-10,899.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 540000
  custom_metrics: {}
  date: 2021-10-23_22-14-29
  done: false
  episode_len_mean: 906.26
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.062599999999854
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 666
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.513308343937786e-10
          cur_lr: 5.000000000000001e-05
          entropy: 3.106185817169678e-23
          entropy_coeff: 0.009999999999999998
          kl: -1.4956704703197304e-25
          policy_loss: -0.06629134093721707
          total_loss: 1916.2777113172742
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 1916.3439995659721
    num_agent_steps_sampled: 540000
    num_agent_steps_trained: 540000
    num_steps_sampled: 540000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,540,8070.07,540000,-9.0626,-2,-10,906.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 541000
  custom_metrics: {}
  date: 2021-10-23_22-14-43
  done: false
  episode_len_mean: 912.93
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.129299999999851
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 667
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.256654171968893e-10
          cur_lr: 5.000000000000001e-05
          entropy: 5.910551173070293e-23
          entropy_coeff: 0.009999999999999998
          kl: -2.8775892784533183e-25
          policy_loss: -0.06628964220484097
          total_loss: 1838.3572862413193
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 1838.4235880533854
    num_agent_steps_sampled: 541000
    num_agent_steps_trained: 541000
    num_steps_sampled: 541000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,541,8083.83,541000,-9.1293,-2,-10,912.93


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 542000
  custom_metrics: {}
  date: 2021-10-23_22-14-58
  done: false
  episode_len_mean: 918.96
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.18959999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 668
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6283270859844466e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.124862046870141e-22
          entropy_coeff: 0.009999999999999998
          kl: -5.537038696482784e-25
          policy_loss: -0.06628736481070518
          total_loss: 1762.7656778971354
          vf_explained_var: 0.0
          vf_loss: 1762.8319458007813
    num_agent_steps_sampled: 542000
    num_agent_steps_trained: 542000
    num_steps_sampled: 542000
    num_steps_trained: 542000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,542,8098.78,542000,-9.1896,-2,-10,918.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 543000
  custom_metrics: {}
  date: 2021-10-23_22-15-13
  done: false
  episode_len_mean: 918.96
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.18959999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 669
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.141635429922233e-11
          cur_lr: 5.000000000000001e-05
          entropy: 2.1404846143990315e-22
          entropy_coeff: 0.009999999999999998
          kl: -1.0651341794823821e-24
          policy_loss: -0.06628549098968506
          total_loss: 1689.5084459092882
          vf_explained_var: 2.7815499947791977e-07
          vf_loss: 1689.5747572157118
    num_agent_steps_sampled: 543000
    num_agent_steps_trained: 543000
    num_steps_sampled: 543000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,543,8113.91,543000,-9.1896,-2,-10,918.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 544000
  custom_metrics: {}
  date: 2021-10-23_22-15-27
  done: false
  episode_len_mean: 921.31
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.21309999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 670
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0708177149611164e-11
          cur_lr: 5.000000000000001e-05
          entropy: 4.0711284611108e-22
          entropy_coeff: 0.009999999999999998
          kl: -2.047907353137587e-24
          policy_loss: -0.06628381585081418
          total_loss: 1618.494160970052
          vf_explained_var: -1.5894572413799324e-07
          vf_loss: 1618.56044921875
    num_agent_steps_sampled: 544000
    num_agent_steps_trained: 544000
    num_steps_sampled: 544000
    num_steps_trained: 544000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,544,8127.99,544000,-9.2131,-2,-10,921.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 545000
  custom_metrics: {}
  date: 2021-10-23_22-15-42
  done: false
  episode_len_mean: 922.9
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.228999999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 671
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0354088574805582e-11
          cur_lr: 5.000000000000001e-05
          entropy: 7.737427367175398e-22
          entropy_coeff: 0.009999999999999998
          kl: -3.9345507452849456e-24
          policy_loss: -0.06628165890773137
          total_loss: 1549.7307915581598
          vf_explained_var: 8.278422569674149e-08
          vf_loss: 1549.7970865885416
    num_agent_steps_sampled: 545000
    num_agent_steps_trained: 545000
    num_steps_sampled: 545000
    num_steps_trained: 54

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,545,8142.15,545000,-9.229,-2,-10,922.9


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 546000
  custom_metrics: {}
  date: 2021-10-23_22-15-56
  done: false
  episode_len_mean: 922.9
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.22899999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 672
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0177044287402791e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.4690100752598336e-21
          entropy_coeff: 0.009999999999999998
          kl: -7.550274024157695e-24
          policy_loss: -0.06627884010473888
          total_loss: 1483.2147908528646
          vf_explained_var: 4.1060978617224464e-08
          vf_loss: 1483.2810777452257
    num_agent_steps_sampled: 546000
    num_agent_steps_trained: 546000
    num_steps_sampled: 546000
    num_steps_trained: 54

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,546,8156.58,546000,-9.229,-2,-10,922.9


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 547000
  custom_metrics: {}
  date: 2021-10-23_22-16-10
  done: false
  episode_len_mean: 925.57
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.25569999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 673
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.0885221437013955e-12
          cur_lr: 5.000000000000001e-05
          entropy: 2.785444213788398e-21
          entropy_coeff: 0.009999999999999998
          kl: -1.4471187025937348e-23
          policy_loss: -0.06627670551339786
          total_loss: 1418.8881808810763
          vf_explained_var: 1.1920928955078125e-07
          vf_loss: 1418.9544542100693
    num_agent_steps_sampled: 547000
    num_agent_steps_trained: 547000
    num_steps_sampled: 547000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,547,8170.47,547000,-9.2557,-2,-10,925.57


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 548000
  custom_metrics: {}
  date: 2021-10-23_22-16-25
  done: false
  episode_len_mean: 926.57
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.265699999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 674
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5442610718506978e-12
          cur_lr: 5.000000000000001e-05
          entropy: 5.272984010680769e-21
          entropy_coeff: 0.009999999999999998
          kl: -2.768420673848972e-23
          policy_loss: -0.06627420832713445
          total_loss: 1356.7717258029513
          vf_explained_var: 1.5232298977707615e-08
          vf_loss: 1356.8379937065972
    num_agent_steps_sampled: 548000
    num_agent_steps_trained: 548000
    num_steps_sampled: 548000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,548,8185.32,548000,-9.2657,-2,-10,926.57


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 549000
  custom_metrics: {}
  date: 2021-10-23_22-16-39
  done: false
  episode_len_mean: 926.57
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.265699999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 675
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2721305359253489e-12
          cur_lr: 5.000000000000001e-05
          entropy: 9.964073297127874e-21
          entropy_coeff: 0.009999999999999998
          kl: -5.2873228605508926e-23
          policy_loss: -0.06627227614323299
          total_loss: 1296.761299641927
          vf_explained_var: -2.6490953874969136e-09
          vf_loss: 1296.8275797526042
    num_agent_steps_sampled: 549000
    num_agent_steps_trained: 549000
    num_steps_sampled: 549000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,549,8199.46,549000,-9.2657,-2,-10,926.57


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 550000
  custom_metrics: {}
  date: 2021-10-23_22-16-53
  done: false
  episode_len_mean: 926.57
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.265699999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 676
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.360652679626744e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.878842706045756e-20
          entropy_coeff: 0.009999999999999998
          kl: -1.0074837854400857e-22
          policy_loss: -0.06626990810036659
          total_loss: 1238.8833346896702
          vf_explained_var: 1.7219119285982742e-07
          vf_loss: 1238.9496134440103
    num_agent_steps_sampled: 550000
    num_agent_steps_trained: 550000
    num_steps_sampled: 550000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,550,8212.94,550000,-9.2657,-2,-10,926.57


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 551000
  custom_metrics: {}
  date: 2021-10-23_22-17-08
  done: false
  episode_len_mean: 926.69
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.266899999999849
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 677
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.180326339813372e-13
          cur_lr: 5.000000000000001e-05
          entropy: 3.5345526394470564e-20
          entropy_coeff: 0.009999999999999998
          kl: -1.9152747876939436e-22
          policy_loss: -0.06626671552658081
          total_loss: 1183.0596069335938
          vf_explained_var: 1.6821755366436264e-07
          vf_loss: 1183.1258850097656
    num_agent_steps_sampled: 551000
    num_agent_steps_trained: 551000
    num_steps_sampled: 551000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,551,8228.12,551000,-9.2669,-2,-10,926.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 552000
  custom_metrics: {}
  date: 2021-10-23_22-17-23
  done: false
  episode_len_mean: 926.77
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.267699999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 678
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.590163169906686e-13
          cur_lr: 5.000000000000001e-05
          entropy: 6.632108873980226e-20
          entropy_coeff: 0.009999999999999998
          kl: -3.631305473863843e-22
          policy_loss: -0.06626411030689876
          total_loss: 1129.2584228515625
          vf_explained_var: -5.298190774993827e-09
          vf_loss: 1129.324683295356
    num_agent_steps_sampled: 552000
    num_agent_steps_trained: 552000
    num_steps_sampled: 552000
    num_steps_trained: 552

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,552,8243.05,552000,-9.2677,-2,-10,926.77


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 553000
  custom_metrics: {}
  date: 2021-10-23_22-17-37
  done: false
  episode_len_mean: 927.53
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.275299999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 679
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.95081584953343e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2409603474980484e-19
          entropy_coeff: 0.009999999999999998
          kl: -6.865302067190563e-22
          policy_loss: -0.06626128032803535
          total_loss: 1077.4450927734374
          vf_explained_var: 4.238552619995062e-08
          vf_loss: 1077.5113538953992
    num_agent_steps_sampled: 553000
    num_agent_steps_trained: 553000
    num_steps_sampled: 553000
    num_steps_trained: 553

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,553,8257.51,553000,-9.2753,-2,-10,927.53


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 554000
  custom_metrics: {}
  date: 2021-10-23_22-17-52
  done: false
  episode_len_mean: 927.53
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.275299999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 680
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.975407924766715e-14
          cur_lr: 5.000000000000001e-05
          entropy: 2.3150068904332704e-19
          entropy_coeff: 0.009999999999999998
          kl: -1.2939685268464447e-21
          policy_loss: -0.06625803684194882
          total_loss: 1027.563325330946
          vf_explained_var: -1.8543667934523e-08
          vf_loss: 1027.6295844184028
    num_agent_steps_sampled: 554000
    num_agent_steps_trained: 554000
    num_steps_sampled: 554000
    num_steps_trained: 554

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,554,8271.95,554000,-9.2753,-2,-10,927.53


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 555000
  custom_metrics: {}
  date: 2021-10-23_22-18-06
  done: false
  episode_len_mean: 927.61
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.276099999999849
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 681
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9877039623833576e-14
          cur_lr: 5.000000000000001e-05
          entropy: 4.3050513449646444e-19
          entropy_coeff: 0.009999999999999998
          kl: -2.43119617203722e-21
          policy_loss: -0.06625545769929886
          total_loss: 979.5875169542101
          vf_explained_var: -3.2981236586238083e-07
          vf_loss: 979.6537807888454
    num_agent_steps_sampled: 555000
    num_agent_steps_trained: 555000
    num_steps_sampled: 555000
    num_steps_trained: 55

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,555,8286.26,555000,-9.2761,-2,-10,927.61


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 556000
  custom_metrics: {}
  date: 2021-10-23_22-18-20
  done: false
  episode_len_mean: 927.69
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.276899999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 682
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.938519811916788e-15
          cur_lr: 5.000000000000001e-05
          entropy: 7.978439869377586e-19
          entropy_coeff: 0.009999999999999998
          kl: -4.551791183842187e-21
          policy_loss: -0.06625247746706009
          total_loss: 933.4624633789062
          vf_explained_var: 1.6027026106257836e-07
          vf_loss: 933.5287190755208
    num_agent_steps_sampled: 556000
    num_agent_steps_trained: 556000
    num_steps_sampled: 556000
    num_steps_trained: 5560

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,556,8300.64,556000,-9.2769,-2,-10,927.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 557000
  custom_metrics: {}
  date: 2021-10-23_22-18-34
  done: false
  episode_len_mean: 927.69
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.276899999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 683
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.969259905958394e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.4733515767475184e-18
          entropy_coeff: 0.009999999999999998
          kl: -8.49132627795057e-21
          policy_loss: -0.0662495344877243
          total_loss: 889.1341946072049
          vf_explained_var: -2.7815499947791977e-07
          vf_loss: 889.200442843967
    num_agent_steps_sampled: 557000
    num_agent_steps_trained: 557000
    num_steps_sampled: 557000
    num_steps_trained: 55700

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,557,8314.42,557000,-9.2769,-2,-10,927.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 558000
  custom_metrics: {}
  date: 2021-10-23_22-18-48
  done: false
  episode_len_mean: 927.81
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.278099999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 684
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.484629952979197e-15
          cur_lr: 5.000000000000001e-05
          entropy: 2.7107078836686834e-18
          entropy_coeff: 0.009999999999999998
          kl: -1.5781583097738588e-20
          policy_loss: -0.06624597683548927
          total_loss: 846.5610371907552
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 846.6272827148438
    num_agent_steps_sampled: 558000
    num_agent_steps_trained: 558000
    num_steps_sampled: 558000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,558,8328.37,558000,-9.2781,-2,-10,927.81


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 559000
  custom_metrics: {}
  date: 2021-10-23_22-19-02
  done: false
  episode_len_mean: 927.81
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.278099999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 685
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2423149764895985e-15
          cur_lr: 5.000000000000001e-05
          entropy: 4.968158524880583e-18
          entropy_coeff: 0.009999999999999998
          kl: -2.921892270125384e-20
          policy_loss: -0.0662427085141341
          total_loss: 805.6990702311198
          vf_explained_var: 7.947286206899662e-08
          vf_loss: 805.7653143988715
    num_agent_steps_sampled: 559000
    num_agent_steps_trained: 559000
    num_steps_sampled: 559000
    num_steps_trained: 55900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,559,8342.62,559000,-9.2781,-2,-10,927.81


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 560000
  custom_metrics: {}
  date: 2021-10-23_22-19-16
  done: false
  episode_len_mean: 928.05
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.280499999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 686
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.211574882447993e-16
          cur_lr: 5.000000000000001e-05
          entropy: 9.069018467012533e-18
          entropy_coeff: 0.009999999999999998
          kl: -5.3874627531008766e-20
          policy_loss: -0.06623910119136174
          total_loss: 766.495051405165
          vf_explained_var: 5.165736070011917e-07
          vf_loss: 766.5612806532118
    num_agent_steps_sampled: 560000
    num_agent_steps_trained: 560000
    num_steps_sampled: 560000
    num_steps_trained: 56000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,560,8356.57,560000,-9.2805,-2,-10,928.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 561000
  custom_metrics: {}
  date: 2021-10-23_22-19-30
  done: false
  episode_len_mean: 929.11
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.291099999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 687
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1057874412239963e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.6487250094209285e-17
          entropy_coeff: 0.009999999999999998
          kl: -9.893610108722903e-20
          policy_loss: -0.0662356788913409
          total_loss: 728.8857008192274
          vf_explained_var: -2.4901495976337173e-07
          vf_loss: 728.9519327799479
    num_agent_steps_sampled: 561000
    num_agent_steps_trained: 561000
    num_steps_sampled: 561000
    num_steps_trained: 56

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,561,8370.13,561000,-9.2911,-2,-10,929.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 562000
  custom_metrics: {}
  date: 2021-10-23_22-19-45
  done: false
  episode_len_mean: 929.11
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.291099999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 688
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5528937206119981e-16
          cur_lr: 5.000000000000001e-05
          entropy: 2.984657482527367e-17
          entropy_coeff: 0.009999999999999998
          kl: -1.809143663787684e-19
          policy_loss: -0.06623177106181781
          total_loss: 692.8352478027343
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 692.9014736599393
    num_agent_steps_sampled: 562000
    num_agent_steps_trained: 562000
    num_steps_sampled: 562000
    num_steps_trained: 562

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,562,8384.69,562000,-9.2911,-2,-10,929.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 563000
  custom_metrics: {}
  date: 2021-10-23_22-19-59
  done: false
  episode_len_mean: 929.31
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.293099999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 689
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.764468603059991e-17
          cur_lr: 5.000000000000001e-05
          entropy: 5.37981137839642e-17
          entropy_coeff: 0.009999999999999998
          kl: -3.2938847378518047e-19
          policy_loss: -0.06622811034321785
          total_loss: 658.2993442111546
          vf_explained_var: -1.5894572413799324e-07
          vf_loss: 658.3655741373698
    num_agent_steps_sampled: 563000
    num_agent_steps_trained: 563000
    num_steps_sampled: 563000
    num_steps_trained: 563

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,563,8399.16,563000,-9.2931,-2,-10,929.31




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 564000
  custom_metrics: {}
  date: 2021-10-23_22-20-40
  done: false
  episode_len_mean: 920.17
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.20169999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 691
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8822343015299953e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.07967907762051457
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.2711214707957374
          total_loss: .inf
          vf_explained_var: 0.027314238250255585
          vf_loss: 80204174.14393039
    num_agent_steps_sampled: 564000
    num_agent_steps_trained: 564000
    num_steps_sampled: 564000
    num_steps_trained: 564000
  iterations_since_restore: 564
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,564,8440.25,564000,-9.2017,-2,-10,920.17




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 565000
  custom_metrics: {}
  date: 2021-10-23_22-20-54
  done: false
  episode_len_mean: 920.13
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.201299999999849
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 692
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.823351452294993e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.4762481451034546
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.13265754398372437
          total_loss: .inf
          vf_explained_var: -0.5375840663909912
          vf_loss: 1064490.148611111
    num_agent_steps_sampled: 565000
    num_agent_steps_trained: 565000
    num_steps_sampled: 565000
    num_steps_trained: 565000
  iterations_since_restore: 565
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,565,8454.12,565000,-9.2013,-2,-10,920.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 566000
  custom_metrics: {}
  date: 2021-10-23_22-21-09
  done: false
  episode_len_mean: 920.09
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.200899999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 693
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.735027178442494e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.04424488132612573
          entropy_coeff: 0.009999999999999998
          kl: 4.74385953479343
          policy_loss: -0.2120033303896586
          total_loss: 9041949.567881944
          vf_explained_var: 0.16187678277492523
          vf_loss: 9041949.631597223
    num_agent_steps_sampled: 566000
    num_agent_steps_trained: 566000
    num_steps_sampled: 566000
    num_steps_trained: 566000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,566,8468.67,566000,-9.2009,-2,-10,920.09




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 567000
  custom_metrics: {}
  date: 2021-10-23_22-21-33
  done: false
  episode_len_mean: 912.09
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.12089999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 695
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3102540767663734e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.34130289262513386
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.14912581576241388
          total_loss: .inf
          vf_explained_var: -0.05748869478702545
          vf_loss: 21316339.55486111
    num_agent_steps_sampled: 567000
    num_agent_steps_trained: 567000
    num_steps_sampled: 567000
    num_steps_trained: 567000
  iterations_since_restore: 567


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,567,8492.56,567000,-9.1209,-2,-10,912.09




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 568000
  custom_metrics: {}
  date: 2021-10-23_22-21-56
  done: false
  episode_len_mean: 906.03
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.060299999999849
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 697
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.96538111514956e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.1741612162027094
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.10573468804359436
          total_loss: .inf
          vf_explained_var: -0.19011375308036804
          vf_loss: 28651880.559722222
    num_agent_steps_sampled: 568000
    num_agent_steps_trained: 568000
    num_steps_sampled: 568000
    num_steps_trained: 568000
  iterations_since_restore: 568
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,568,8516.39,568000,-9.0603,-2,-10,906.03




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 569000
  custom_metrics: {}
  date: 2021-10-23_22-22-55
  done: false
  episode_len_mean: 868.9
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.688999999999858
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 5
  episodes_total: 702
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.94807167272434e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.019872064102027152
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.23068995773792267
          total_loss: .inf
          vf_explained_var: -0.3333333134651184
          vf_loss: 207949.8412326389
    num_agent_steps_sampled: 569000
    num_agent_steps_trained: 569000
    num_steps_sampled: 569000
    num_steps_trained: 569000
  iterations_since_restore: 569
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,569,8574.74,569000,-8.689,-2,-10,868.9




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 570000
  custom_metrics: {}
  date: 2021-10-23_22-23-10
  done: false
  episode_len_mean: 867.66
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.676599999999858
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 703
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.422107509086511e-16
          cur_lr: 5.000000000000001e-05
          entropy: 4.3404986122520527e-23
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.12728055318196616
          total_loss: .inf
          vf_explained_var: 1.3907749973895989e-07
          vf_loss: 14573.601161024306
    num_agent_steps_sampled: 570000
    num_agent_steps_trained: 570000
    num_steps_sampled: 570000
    num_steps_trained: 570000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,570,8590,570000,-8.6766,-2,-10,867.66


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 571000
  custom_metrics: {}
  date: 2021-10-23_22-23-25
  done: false
  episode_len_mean: 867.7
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.67699999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 704
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.633161263629767e-16
          cur_lr: 5.000000000000001e-05
          entropy: 4.953847369283379e-23
          entropy_coeff: 0.009999999999999998
          kl: -8.778318866848709e-26
          policy_loss: 0.12726702292760214
          total_loss: 5960.4583116319445
          vf_explained_var: 7.947286206899662e-08
          vf_loss: 5960.331108940973
    num_agent_steps_sampled: 571000
    num_agent_steps_trained: 571000
    num_steps_sampled: 571000
    num_steps_trained: 571000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,571,8604.85,571000,-8.677,-2,-10,867.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 572000
  custom_metrics: {}
  date: 2021-10-23_22-23-41
  done: false
  episode_len_mean: 867.7
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.67699999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 705
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3165806318148836e-16
          cur_lr: 5.000000000000001e-05
          entropy: 5.483116459582556e-23
          entropy_coeff: 0.009999999999999998
          kl: -6.845652548504764e-26
          policy_loss: 0.12726746996243796
          total_loss: 5931.439887152777
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 5931.312662760417
    num_agent_steps_sampled: 572000
    num_agent_steps_trained: 572000
    num_steps_sampled: 572000
    num_steps_trained: 572000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,572,8620.29,572000,-8.677,-2,-10,867.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 573000
  custom_metrics: {}
  date: 2021-10-23_22-23-54
  done: false
  episode_len_mean: 867.7
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.676999999999861
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 706
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6582903159074418e-16
          cur_lr: 5.000000000000001e-05
          entropy: 6.091028327652276e-23
          entropy_coeff: 0.009999999999999998
          kl: -7.679491268987823e-26
          policy_loss: 0.12726779282093048
          total_loss: 5895.566531032986
          vf_explained_var: 9.934107225717526e-08
          vf_loss: 5895.439322916666
    num_agent_steps_sampled: 573000
    num_agent_steps_trained: 573000
    num_steps_sampled: 573000
    num_steps_trained: 573000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,573,8634,573000,-8.677,-2,-10,867.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 574000
  custom_metrics: {}
  date: 2021-10-23_22-24-09
  done: false
  episode_len_mean: 867.7
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.676999999999861
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 707
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.291451579537209e-17
          cur_lr: 5.000000000000001e-05
          entropy: 6.795794563759744e-23
          entropy_coeff: 0.009999999999999998
          kl: -8.818400718249214e-26
          policy_loss: 0.1272680958112081
          total_loss: 5856.555875651042
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 5856.428629557292
    num_agent_steps_sampled: 574000
    num_agent_steps_trained: 574000
    num_steps_sampled: 574000
    num_steps_trained: 574000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,574,8648.95,574000,-8.677,-2,-10,867.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 575000
  custom_metrics: {}
  date: 2021-10-23_22-24-22
  done: false
  episode_len_mean: 867.7
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.676999999999861
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 708
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.1457257897686045e-17
          cur_lr: 5.000000000000001e-05
          entropy: 7.616274732025763e-23
          entropy_coeff: 0.009999999999999998
          kl: -1.0174621034757848e-25
          policy_loss: 0.12726842612028122
          total_loss: 5816.166129557291
          vf_explained_var: -1.986821445143505e-07
          vf_loss: 5816.038926866319
    num_agent_steps_sampled: 575000
    num_agent_steps_trained: 575000
    num_steps_sampled: 575000
    num_steps_trained: 5750

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,575,8662.17,575000,-8.677,-2,-10,867.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 576000
  custom_metrics: {}
  date: 2021-10-23_22-24-37
  done: false
  episode_len_mean: 867.7
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.676999999999861
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 709
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0728628948843023e-17
          cur_lr: 5.000000000000001e-05
          entropy: 8.575642157966616e-23
          entropy_coeff: 0.009999999999999998
          kl: -1.1795614728211126e-25
          policy_loss: 0.1272687961657842
          total_loss: 5774.355588107639
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 5774.228233506945
    num_agent_steps_sampled: 576000
    num_agent_steps_trained: 576000
    num_steps_sampled: 576000
    num_steps_trained: 5760

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,576,8677.14,576000,-8.677,-2,-10,867.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 577000
  custom_metrics: {}
  date: 2021-10-23_22-24-52
  done: false
  episode_len_mean: 868.21
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.682099999999862
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 710
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0364314474421511e-17
          cur_lr: 5.000000000000001e-05
          entropy: 9.702465484843511e-23
          entropy_coeff: 0.009999999999999998
          kl: -1.3743234992093992e-25
          policy_loss: 0.12726930528879166
          total_loss: 5731.087603081597
          vf_explained_var: -2.384185791015625e-07
          vf_loss: 5730.960367838542
    num_agent_steps_sampled: 577000
    num_agent_steps_trained: 577000
    num_steps_sampled: 577000
    num_steps_trained: 577

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,577,8691.2,577000,-8.6821,-2,-10,868.21


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 578000
  custom_metrics: {}
  date: 2021-10-23_22-25-05
  done: false
  episode_len_mean: 868.37
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.68369999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 711
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.182157237210756e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.1032269363089571e-22
          entropy_coeff: 0.009999999999999998
          kl: -1.6095235798813292e-25
          policy_loss: 0.1272695536414782
          total_loss: 5686.313340928819
          vf_explained_var: 1.986821445143505e-07
          vf_loss: 5686.1860568576385
    num_agent_steps_sampled: 578000
    num_agent_steps_trained: 578000
    num_steps_sampled: 578000
    num_steps_trained: 57800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,578,8704.87,578000,-8.6837,-2,-10,868.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 579000
  custom_metrics: {}
  date: 2021-10-23_22-25-20
  done: false
  episode_len_mean: 868.37
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.68369999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 712
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.591078618605378e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.2609365428025282e-22
          entropy_coeff: 0.009999999999999998
          kl: -1.8951669657507627e-25
          policy_loss: 0.12727013230323792
          total_loss: 5640.045008680556
          vf_explained_var: 1.1920928955078125e-07
          vf_loss: 5639.91771375868
    num_agent_steps_sampled: 579000
    num_agent_steps_trained: 579000
    num_steps_sampled: 579000
    num_steps_trained: 57900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,579,8719.35,579000,-8.6837,-2,-10,868.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 580000
  custom_metrics: {}
  date: 2021-10-23_22-25-34
  done: false
  episode_len_mean: 868.37
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.68369999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 713
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.295539309302689e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.4489053291905036e-22
          entropy_coeff: 0.009999999999999998
          kl: -2.2436727645804285e-25
          policy_loss: 0.12727061410744986
          total_loss: 5592.210394965277
          vf_explained_var: 3.97364289028701e-07
          vf_loss: 5592.083175998264
    num_agent_steps_sampled: 580000
    num_agent_steps_trained: 580000
    num_steps_sampled: 580000
    num_steps_trained: 580000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,580,8733.73,580000,-8.6837,-2,-10,868.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 581000
  custom_metrics: {}
  date: 2021-10-23_22-25-48
  done: false
  episode_len_mean: 868.37
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.68369999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 714
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.477696546513445e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.6741068062411658e-22
          entropy_coeff: 0.009999999999999998
          kl: -2.670868847288253e-25
          policy_loss: 0.12727088977893194
          total_loss: 5542.795279947916
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 5542.668082682291
    num_agent_steps_sampled: 581000
    num_agent_steps_trained: 581000
    num_steps_sampled: 581000
    num_steps_trained: 581000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,581,8747.97,581000,-8.6837,-2,-10,868.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 582000
  custom_metrics: {}
  date: 2021-10-23_22-26-02
  done: false
  episode_len_mean: 868.41
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.68409999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 715
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2388482732567223e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.9453855786448176e-22
          entropy_coeff: 0.009999999999999998
          kl: -3.1984818024825794e-25
          policy_loss: 0.12727155784765878
          total_loss: 5491.776996527778
          vf_explained_var: 3.178914482759865e-07
          vf_loss: 5491.649745008681
    num_agent_steps_sampled: 582000
    num_agent_steps_trained: 582000
    num_steps_sampled: 582000
    num_steps_trained: 5820

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,582,8761.79,582000,-8.6841,-2,-10,868.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 583000
  custom_metrics: {}
  date: 2021-10-23_22-26-16
  done: false
  episode_len_mean: 868.41
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.68409999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 716
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6194241366283611e-19
          cur_lr: 5.000000000000001e-05
          entropy: 2.273977338281376e-22
          entropy_coeff: 0.009999999999999998
          kl: -3.852834473010415e-25
          policy_loss: 0.12727200984954834
          total_loss: 5439.119292534722
          vf_explained_var: 1.5894572413799324e-07
          vf_loss: 5438.992122395834
    num_agent_steps_sampled: 583000
    num_agent_steps_trained: 583000
    num_steps_sampled: 583000
    num_steps_trained: 58300

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,583,8775.48,583000,-8.6841,-2,-10,868.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 584000
  custom_metrics: {}
  date: 2021-10-23_22-26-31
  done: false
  episode_len_mean: 868.41
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.68409999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 717
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.097120683141806e-20
          cur_lr: 5.000000000000001e-05
          entropy: 2.6742771979130233e-22
          entropy_coeff: 0.009999999999999998
          kl: -4.669840360347003e-25
          policy_loss: 0.12727264811595282
          total_loss: 5384.7995822482635
          vf_explained_var: -3.5762786865234375e-07
          vf_loss: 5384.672265625
    num_agent_steps_sampled: 584000
    num_agent_steps_trained: 584000
    num_steps_sampled: 584000
    num_steps_trained: 584000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,584,8790.08,584000,-8.6841,-2,-10,868.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 585000
  custom_metrics: {}
  date: 2021-10-23_22-26-44
  done: false
  episode_len_mean: 868.45
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.684499999999861
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 718
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.048560341570903e-20
          cur_lr: 5.000000000000001e-05
          entropy: 3.1648159251364747e-22
          entropy_coeff: 0.009999999999999998
          kl: -5.695856255181422e-25
          policy_loss: 0.12727320939302444
          total_loss: 5328.774262152778
          vf_explained_var: -2.384185791015625e-07
          vf_loss: 5328.6469509548615
    num_agent_steps_sampled: 585000
    num_agent_steps_trained: 585000
    num_steps_sampled: 585000
    num_steps_trained: 585

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,585,8803.28,585000,-8.6845,-2,-10,868.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 586000
  custom_metrics: {}
  date: 2021-10-23_22-26-58
  done: false
  episode_len_mean: 868.45
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.684499999999861
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 719
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0242801707854514e-20
          cur_lr: 5.000000000000001e-05
          entropy: 3.7695652386178083e-22
          entropy_coeff: 0.009999999999999998
          kl: -6.991653806262957e-25
          policy_loss: 0.12727367877960205
          total_loss: 5271.010660807292
          vf_explained_var: 2.1855036891338386e-07
          vf_loss: 5270.883436414931
    num_agent_steps_sampled: 586000
    num_agent_steps_trained: 586000
    num_steps_sampled: 586000
    num_steps_trained: 586

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,586,8817.22,586000,-8.6845,-2,-10,868.45




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 587000
  custom_metrics: {}
  date: 2021-10-23_22-27-29
  done: false
  episode_len_mean: 867.08
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.670799999999861
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 720
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0121400853927257e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.5682238777478535
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.01649586856365204
          total_loss: .inf
          vf_explained_var: 0.0632905513048172
          vf_loss: 95893763.3871582
    num_agent_steps_sampled: 587000
    num_agent_steps_trained: 587000
    num_steps_sampled: 587000
    num_steps_trained: 587000
  iterations_since_restore: 587
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,587,8848.26,587000,-8.6708,-2,-10,867.08


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 588000
  custom_metrics: {}
  date: 2021-10-23_22-27-45
  done: false
  episode_len_mean: 867.08
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.670799999999861
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 721
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5182101280890886e-20
          cur_lr: 5.000000000000001e-05
          entropy: 6.884783389492671e-24
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.07894358038902283
          total_loss: 26180899.111111112
          vf_explained_var: 2.781550101360608e-08
          vf_loss: 26180899.244444445
    num_agent_steps_sampled: 588000
    num_agent_steps_trained: 588000
    num_steps_sampled: 588000
    num_steps_trained: 588000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,588,8864.23,588000,-8.6708,-2,-10,867.08


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 589000
  custom_metrics: {}
  date: 2021-10-23_22-27-59
  done: false
  episode_len_mean: 867.28
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.67279999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 722
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.591050640445443e-21
          cur_lr: 5.000000000000001e-05
          entropy: 1.4815640582858166e-27
          entropy_coeff: 0.009999999999999998
          kl: 1.66098367680427e-29
          policy_loss: -0.07891465723514557
          total_loss: 17726.468760850694
          vf_explained_var: -1.5894572413799324e-07
          vf_loss: 17726.54740668403
    num_agent_steps_sampled: 589000
    num_agent_steps_trained: 589000
    num_steps_sampled: 589000
    num_steps_trained: 5890

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,589,8878.2,589000,-8.6728,-2,-10,867.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 590000
  custom_metrics: {}
  date: 2021-10-23_22-28-13
  done: false
  episode_len_mean: 867.28
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.67279999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 723
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7955253202227214e-21
          cur_lr: 5.000000000000001e-05
          entropy: 1.671344907186204e-27
          entropy_coeff: 0.009999999999999998
          kl: -3.271160073143871e-30
          policy_loss: -0.07891532778739929
          total_loss: 15410.457411024305
          vf_explained_var: 3.5762786865234375e-07
          vf_loss: 15410.536284722222
    num_agent_steps_sampled: 590000
    num_agent_steps_trained: 590000
    num_steps_sampled: 590000
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,590,8891.88,590000,-8.6728,-2,-10,867.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 591000
  custom_metrics: {}
  date: 2021-10-23_22-28-27
  done: false
  episode_len_mean: 867.72
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.677199999999862
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 724
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8977626601113607e-21
          cur_lr: 5.000000000000001e-05
          entropy: 2.130357592160991e-27
          entropy_coeff: 0.009999999999999998
          kl: -4.221367994984958e-30
          policy_loss: -0.0789151390393575
          total_loss: 15207.451844618055
          vf_explained_var: 1.1920928955078125e-07
          vf_loss: 15207.53074001736
    num_agent_steps_sampled: 591000
    num_agent_steps_trained: 591000
    num_steps_sampled: 591000
    num_steps_trained: 591

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,591,8906.54,591000,-8.6772,-2,-10,867.72


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 592000
  custom_metrics: {}
  date: 2021-10-23_22-28-42
  done: false
  episode_len_mean: 867.8
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.67799999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 725
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.488813300556804e-22
          cur_lr: 5.000000000000001e-05
          entropy: 2.7362165484694413e-27
          entropy_coeff: 0.009999999999999998
          kl: -5.5634969120802505e-30
          policy_loss: -0.07891493539015453
          total_loss: 14993.843142361111
          vf_explained_var: 1.7881393432617188e-07
          vf_loss: 14993.921994357639
    num_agent_steps_sampled: 592000
    num_agent_steps_trained: 592000
    num_steps_sampled: 592000
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,592,8920.74,592000,-8.678,-2,-10,867.8


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 593000
  custom_metrics: {}
  date: 2021-10-23_22-28-55
  done: false
  episode_len_mean: 867.8
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.67799999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 726
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.744406650278402e-22
          cur_lr: 5.000000000000001e-05
          entropy: 3.5422913427760325e-27
          entropy_coeff: 0.009999999999999998
          kl: -7.3875683107666e-30
          policy_loss: -0.07891480624675751
          total_loss: 14775.625347222222
          vf_explained_var: -1.3907749973895989e-07
          vf_loss: 14775.704112413194
    num_agent_steps_sampled: 593000
    num_agent_steps_trained: 593000
    num_steps_sampled: 593000
    num_steps_trained: 5930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,593,8934.41,593000,-8.678,-2,-10,867.8


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 594000
  custom_metrics: {}
  date: 2021-10-23_22-29-09
  done: false
  episode_len_mean: 867.8
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.67799999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 727
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.372203325139201e-22
          cur_lr: 5.000000000000001e-05
          entropy: 4.62326720794887e-27
          entropy_coeff: 0.009999999999999998
          kl: -9.897372884065399e-30
          policy_loss: -0.07891451319058736
          total_loss: 14552.878483072916
          vf_explained_var: 2.980232238769531e-07
          vf_loss: 14552.957356770834
    num_agent_steps_sampled: 594000
    num_agent_steps_trained: 594000
    num_steps_sampled: 594000
    num_steps_trained: 594000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,594,8948.52,594000,-8.678,-2,-10,867.8


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 595000
  custom_metrics: {}
  date: 2021-10-23_22-29-23
  done: false
  episode_len_mean: 867.96
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.679599999999862
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 728
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1861016625696004e-22
          cur_lr: 5.000000000000001e-05
          entropy: 6.085131693855722e-27
          entropy_coeff: 0.009999999999999998
          kl: -1.3356242763692675e-29
          policy_loss: -0.07891428470611572
          total_loss: 14326.877897135417
          vf_explained_var: -7.285012060265217e-08
          vf_loss: 14326.956705729166
    num_agent_steps_sampled: 595000
    num_agent_steps_trained: 595000
    num_steps_sampled: 595000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,595,8962.53,595000,-8.6796,-2,-10,867.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 596000
  custom_metrics: {}
  date: 2021-10-23_22-29-37
  done: false
  episode_len_mean: 870.8
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.707999999999862
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 729
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.930508312848002e-23
          cur_lr: 5.000000000000001e-05
          entropy: 8.07597781188014e-27
          entropy_coeff: 0.009999999999999998
          kl: -1.8160608278634618e-29
          policy_loss: -0.07891403635342915
          total_loss: 14096.857790798611
          vf_explained_var: 2.1855036891338386e-07
          vf_loss: 14096.93671875
    num_agent_steps_sampled: 596000
    num_agent_steps_trained: 596000
    num_steps_sampled: 596000
    num_steps_trained: 596000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,596,8976.23,596000,-8.708,-2,-10,870.8


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 597000
  custom_metrics: {}
  date: 2021-10-23_22-29-51
  done: false
  episode_len_mean: 872.15
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.72149999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 730
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.965254156424001e-23
          cur_lr: 5.000000000000001e-05
          entropy: 1.080826527280996e-26
          entropy_coeff: 0.009999999999999998
          kl: -2.4902649937883816e-29
          policy_loss: -0.07891373336315155
          total_loss: 13862.563899739584
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 13862.642708333333
    num_agent_steps_sampled: 597000
    num_agent_steps_trained: 597000
    num_steps_sampled: 597000
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,597,8990.41,597000,-8.7215,-2,-10,872.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 598000
  custom_metrics: {}
  date: 2021-10-23_22-30-06
  done: false
  episode_len_mean: 872.15
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.72149999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 731
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4826270782120005e-23
          cur_lr: 5.000000000000001e-05
          entropy: 1.4591575853782336e-26
          entropy_coeff: 0.009999999999999998
          kl: -3.444764369968445e-29
          policy_loss: -0.0789135346810023
          total_loss: 13624.56095920139
          vf_explained_var: 0.0
          vf_loss: 13624.639865451389
    num_agent_steps_sampled: 598000
    num_agent_steps_trained: 598000
    num_steps_sampled: 598000
    num_steps_trained: 598000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,598,9004.94,598000,-8.7215,-2,-10,872.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 599000
  custom_metrics: {}
  date: 2021-10-23_22-30-20
  done: false
  episode_len_mean: 872.15
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.72149999999986
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 732
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.413135391060003e-24
          cur_lr: 5.000000000000001e-05
          entropy: 1.9871680523418876e-26
          entropy_coeff: 0.009999999999999998
          kl: -4.8102139890789666e-29
          policy_loss: -0.07891333599885304
          total_loss: 13382.628374565973
          vf_explained_var: 5.828009719266447e-08
          vf_loss: 13382.707183159722
    num_agent_steps_sampled: 599000
    num_agent_steps_trained: 599000
    num_steps_sampled: 599000
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,599,9018.63,599000,-8.7215,-2,-10,872.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 600000
  custom_metrics: {}
  date: 2021-10-23_22-30-34
  done: false
  episode_len_mean: 880.15
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.801499999999859
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 733
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7065676955300014e-24
          cur_lr: 5.000000000000001e-05
          entropy: 2.7299928292449407e-26
          entropy_coeff: 0.009999999999999998
          kl: -6.770851706308506e-29
          policy_loss: -0.07891301314036052
          total_loss: 13137.395746527778
          vf_explained_var: 0.0
          vf_loss: 13137.47463107639
    num_agent_steps_sampled: 600000
    num_agent_steps_trained: 600000
    num_steps_sampled: 600000
    num_steps_trained: 600000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,600,9032.83,600000,-8.8015,-2,-10,880.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 601000
  custom_metrics: {}
  date: 2021-10-23_22-30-49
  done: false
  episode_len_mean: 888.15
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.881499999999857
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 734
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8532838477650007e-24
          cur_lr: 5.000000000000001e-05
          entropy: 3.7841618693987995e-26
          entropy_coeff: 0.009999999999999998
          kl: -9.616155040101483e-29
          policy_loss: -0.07891255617141724
          total_loss: 12889.097287326389
          vf_explained_var: 7.947286206899662e-08
          vf_loss: 12889.176085069445
    num_agent_steps_sampled: 601000
    num_agent_steps_trained: 601000
    num_steps_sampled: 601000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,601,9047.89,601000,-8.8815,-2,-10,888.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 602000
  custom_metrics: {}
  date: 2021-10-23_22-31-03
  done: false
  episode_len_mean: 896.12
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.961199999999856
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 735
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.266419238825003e-25
          cur_lr: 5.000000000000001e-05
          entropy: 5.292882684116855e-26
          entropy_coeff: 0.009999999999999998
          kl: -1.376228507090688e-28
          policy_loss: -0.07891226808230083
          total_loss: 12638.365152994791
          vf_explained_var: -1.7219120351796846e-08
          vf_loss: 12638.444032118055
    num_agent_steps_sampled: 602000
    num_agent_steps_trained: 602000
    num_steps_sampled: 602000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,602,9061.83,602000,-8.9612,-2,-10,896.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 603000
  custom_metrics: {}
  date: 2021-10-23_22-31-17
  done: false
  episode_len_mean: 904.09
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.040899999999853
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 736
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.633209619412502e-25
          cur_lr: 5.000000000000001e-05
          entropy: 7.470964046075924e-26
          entropy_coeff: 0.009999999999999998
          kl: -1.9896508903813937e-28
          policy_loss: -0.07891184091567993
          total_loss: 12384.415614149306
          vf_explained_var: 9.934107225717526e-08
          vf_loss: 12384.494325086805
    num_agent_steps_sampled: 603000
    num_agent_steps_trained: 603000
    num_steps_sampled: 603000
    num_steps_trained: 60

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,603,9075.58,603000,-9.0409,-2,-10,904.09


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 604000
  custom_metrics: {}
  date: 2021-10-23_22-31-32
  done: false
  episode_len_mean: 912.06
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.120599999999852
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 737
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.316604809706251e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.0639274063649385e-25
          entropy_coeff: 0.009999999999999998
          kl: -2.8999674984287007e-28
          policy_loss: -0.07891165713469188
          total_loss: 12127.763981119791
          vf_explained_var: -4.3710073782676773e-07
          vf_loss: 12127.843033854168
    num_agent_steps_sampled: 604000
    num_agent_steps_trained: 604000
    num_steps_sampled: 604000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,604,9090.58,604000,-9.1206,-2,-10,912.06


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 605000
  custom_metrics: {}
  date: 2021-10-23_22-31-45
  done: false
  episode_len_mean: 920.03
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.20029999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 738
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1583024048531254e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.5292891322808545e-25
          entropy_coeff: 0.009999999999999998
          kl: -4.264095989699092e-28
          policy_loss: -0.07891128957271576
          total_loss: 11869.106358506944
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 11869.185074869793
    num_agent_steps_sampled: 605000
    num_agent_steps_trained: 605000
    num_steps_sampled: 605000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,605,9104.22,605000,-9.2003,-2,-10,920.03


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 606000
  custom_metrics: {}
  date: 2021-10-23_22-31-59
  done: false
  episode_len_mean: 928.0
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.279999999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 739
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.791512024265627e-26
          cur_lr: 5.000000000000001e-05
          entropy: 2.2183505198903095e-25
          entropy_coeff: 0.009999999999999998
          kl: -6.332111156267935e-28
          policy_loss: -0.07891097664833069
          total_loss: 11607.682373046875
          vf_explained_var: -8.742014756535355e-08
          vf_loss: 11607.761143663194
    num_agent_steps_sampled: 606000
    num_agent_steps_trained: 606000
    num_steps_sampled: 606000
    num_steps_trained: 60

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,606,9118.13,606000,-9.28,-2,-10,928


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 607000
  custom_metrics: {}
  date: 2021-10-23_22-32-14
  done: false
  episode_len_mean: 931.56
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.315599999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 740
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8957560121328136e-26
          cur_lr: 5.000000000000001e-05
          entropy: 3.248172045447087e-25
          entropy_coeff: 0.009999999999999998
          kl: -9.491810568079033e-28
          policy_loss: -0.07891047497590382
          total_loss: 11344.769346788195
          vf_explained_var: -5.960464477539063e-08
          vf_loss: 11344.848263888889
    num_agent_steps_sampled: 607000
    num_agent_steps_trained: 607000
    num_steps_sampled: 607000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,607,9132.38,607000,-9.3156,-2,-10,931.56


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 608000
  custom_metrics: {}
  date: 2021-10-23_22-32-28
  done: false
  episode_len_mean: 931.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.316399999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 741
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4478780060664068e-26
          cur_lr: 5.000000000000001e-05
          entropy: 4.799788207951248e-25
          entropy_coeff: 0.009999999999999998
          kl: -1.4321419942611125e-27
          policy_loss: -0.07891014715035756
          total_loss: 11081.129182942708
          vf_explained_var: -1.986821445143505e-07
          vf_loss: 11081.2080078125
    num_agent_steps_sampled: 608000
    num_agent_steps_trained: 608000
    num_steps_sampled: 608000
    num_steps_trained: 60

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,608,9146.41,608000,-9.3164,-2,-10,931.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 609000
  custom_metrics: {}
  date: 2021-10-23_22-32-39
  done: false
  episode_len_mean: 931.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.316399999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 742
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.239390030332034e-27
          cur_lr: 5.000000000000001e-05
          entropy: 7.158016053941482e-25
          entropy_coeff: 0.009999999999999998
          kl: -2.1856342093065956e-27
          policy_loss: -0.07890976468722026
          total_loss: 10814.678944227431
          vf_explained_var: -4.17232506322307e-08
          vf_loss: 10814.757660590278
    num_agent_steps_sampled: 609000
    num_agent_steps_trained: 609000
    num_steps_sampled: 609000
    num_steps_trained: 60

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,609,9158.02,609000,-9.3164,-2,-10,931.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 610000
  custom_metrics: {}
  date: 2021-10-23_22-32-52
  done: false
  episode_len_mean: 931.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.316399999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 743
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.619695015166017e-27
          cur_lr: 5.000000000000001e-05
          entropy: 1.0777044643572505e-24
          entropy_coeff: 0.009999999999999998
          kl: -3.3619409398486084e-27
          policy_loss: -0.0789093275864919
          total_loss: 10548.35398220486
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 10548.432958984375
    num_agent_steps_sampled: 610000
    num_agent_steps_trained: 610000
    num_steps_sampled: 610000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,610,9171.09,610000,-9.3164,-2,-10,931.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 611000
  custom_metrics: {}
  date: 2021-10-23_22-33-06
  done: false
  episode_len_mean: 931.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.316399999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 744
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8098475075830085e-27
          cur_lr: 5.000000000000001e-05
          entropy: 1.6376804358600336e-24
          entropy_coeff: 0.009999999999999998
          kl: -5.22558677419747e-27
          policy_loss: -0.07890883088111877
          total_loss: 10280.457373046875
          vf_explained_var: 1.1920928955078125e-07
          vf_loss: 10280.536284722222
    num_agent_steps_sampled: 611000
    num_agent_steps_trained: 611000
    num_steps_sampled: 611000
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,611,9184.66,611000,-9.3164,-2,-10,931.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 612000
  custom_metrics: {}
  date: 2021-10-23_22-33-20
  done: false
  episode_len_mean: 931.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.316399999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 745
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.049237537915042e-28
          cur_lr: 5.000000000000001e-05
          entropy: 2.5113878903962134e-24
          entropy_coeff: 0.009999999999999998
          kl: -8.186805531846945e-27
          policy_loss: -0.07890829940636952
          total_loss: 10012.210671657986
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 10012.289463975694
    num_agent_steps_sampled: 612000
    num_agent_steps_trained: 612000
    num_steps_sampled: 612000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,612,9198.94,612000,-9.3164,-2,-10,931.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 613000
  custom_metrics: {}
  date: 2021-10-23_22-33-35
  done: false
  episode_len_mean: 931.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.316399999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 746
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.524618768957521e-28
          cur_lr: 5.000000000000001e-05
          entropy: 3.886752028342507e-24
          entropy_coeff: 0.009999999999999998
          kl: -1.295285886191722e-26
          policy_loss: -0.07890773316224416
          total_loss: 9743.15785047743
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 9743.23671875
    num_agent_steps_sampled: 613000
    num_agent_steps_trained: 613000
    num_steps_sampled: 613000
    num_steps_trained: 613000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,613,9213.61,613000,-9.3164,-2,-10,931.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 614000
  custom_metrics: {}
  date: 2021-10-23_22-33-49
  done: false
  episode_len_mean: 931.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.316399999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 747
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2623093844787606e-28
          cur_lr: 5.000000000000001e-05
          entropy: 6.0732513699424354e-24
          entropy_coeff: 0.009999999999999998
          kl: -2.0686540819513248e-26
          policy_loss: -0.07890724142392476
          total_loss: 9474.753634982639
          vf_explained_var: -2.291467495751931e-07
          vf_loss: 9474.83255750868
    num_agent_steps_sampled: 614000
    num_agent_steps_trained: 614000
    num_steps_sampled: 614000
    num_steps_trained: 61

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,614,9227.44,614000,-9.3164,-2,-10,931.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 615000
  custom_metrics: {}
  date: 2021-10-23_22-34-04
  done: false
  episode_len_mean: 931.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.316399999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 748
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1311546922393803e-28
          cur_lr: 5.000000000000001e-05
          entropy: 9.57373207485257e-24
          entropy_coeff: 0.009999999999999998
          kl: -3.326551983005279e-26
          policy_loss: -0.0789068192243576
          total_loss: 9206.81826171875
          vf_explained_var: -7.351239617037209e-08
          vf_loss: 9206.89712999132
    num_agent_steps_sampled: 615000
    num_agent_steps_trained: 615000
    num_steps_sampled: 615000
    num_steps_trained: 615000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,615,9242.12,615000,-9.3164,-2,-10,931.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 616000
  custom_metrics: {}
  date: 2021-10-23_22-34-19
  done: false
  episode_len_mean: 931.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.316399999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 749
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.655773461196902e-29
          cur_lr: 5.000000000000001e-05
          entropy: 1.5234946721185233e-23
          entropy_coeff: 0.009999999999999998
          kl: -5.413331250593813e-26
          policy_loss: -0.07890644172827403
          total_loss: 8938.436968315973
          vf_explained_var: 0.0
          vf_loss: 8938.515852864582
    num_agent_steps_sampled: 616000
    num_agent_steps_trained: 616000
    num_steps_sampled: 616000
    num_steps_trained: 616000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,616,9257.27,616000,-9.3164,-2,-10,931.64




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 617000
  custom_metrics: {}
  date: 2021-10-23_22-34-49
  done: false
  episode_len_mean: 930.25
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.302499999999846
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 750
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.827886730598451e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.23208458906030408
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.23896840429968305
          total_loss: .inf
          vf_explained_var: 0.12646761536598206
          vf_loss: 61951292.99080946
    num_agent_steps_sampled: 617000
    num_agent_steps_trained: 617000
    num_steps_sampled: 617000
    num_steps_trained: 617000
  iterations_since_restore: 617
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,617,9287.58,617000,-9.3025,-2,-10,930.25


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 618000
  custom_metrics: {}
  date: 2021-10-23_22-35-05
  done: false
  episode_len_mean: 930.13
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.301299999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 751
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.241830095897677e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.05620790083711553
          entropy_coeff: 0.009999999999999998
          kl: 0.3129253405663702
          policy_loss: 0.07516504190862179
          total_loss: 557970277.3333334
          vf_explained_var: 0.10791795700788498
          vf_loss: 557970276.9777777
    num_agent_steps_sampled: 618000
    num_agent_steps_trained: 618000
    num_steps_sampled: 618000
    num_steps_trained: 618000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,618,9303.58,618000,-9.3013,-2,-10,930.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 619000
  custom_metrics: {}
  date: 2021-10-23_22-35-20
  done: false
  episode_len_mean: 930.01
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.300099999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 752
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.362745143846515e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.16663684050242106
          entropy_coeff: 0.009999999999999998
          kl: 0.8584359195497301
          policy_loss: 0.0327728776468171
          total_loss: 3215330245.1555557
          vf_explained_var: 0.018884878605604172
          vf_loss: 3215330245.1555557
    num_agent_steps_sampled: 619000
    num_agent_steps_trained: 619000
    num_steps_sampled: 619000
    num_steps_trained: 619000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,619,9318.54,619000,-9.3001,-2,-10,930.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 620000
  custom_metrics: {}
  date: 2021-10-23_22-35-38
  done: false
  episode_len_mean: 927.96
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.279599999999846
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 753
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.544117715769774e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.33474242157406275
          entropy_coeff: 0.009999999999999998
          kl: 0.16168010085821152
          policy_loss: -0.004754762434297137
          total_loss: 1250219531.0222223
          vf_explained_var: 0.2992425560951233
          vf_loss: 1250219531.0222223
    num_agent_steps_sampled: 620000
    num_agent_steps_trained: 620000
    num_steps_sampled: 620000
    num_steps_trained: 620000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,620,9336.11,620000,-9.2796,-2,-10,927.96




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 621000
  custom_metrics: {}
  date: 2021-10-23_22-35-52
  done: false
  episode_len_mean: 927.09
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.270899999999846
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 754
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4316176573654656e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.09103917572647333
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.01792789896329244
          total_loss: .inf
          vf_explained_var: 0.274446040391922
          vf_loss: 671676059.3777778
    num_agent_steps_sampled: 621000
    num_agent_steps_trained: 621000
    num_steps_sampled: 621000
    num_steps_trained: 621000
  iterations_since_restore: 621
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,621,9350.51,621000,-9.2709,-2,-10,927.09




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 622000
  custom_metrics: {}
  date: 2021-10-23_22-36-07
  done: false
  episode_len_mean: 926.58
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.265799999999846
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 755
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1474264860481994e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.1121712002489302
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.05766180223888821
          total_loss: .inf
          vf_explained_var: -0.6666666865348816
          vf_loss: 153655365.7777778
    num_agent_steps_sampled: 622000
    num_agent_steps_trained: 622000
    num_steps_sampled: 622000
    num_steps_trained: 622000
  iterations_since_restore: 622
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,622,9365.44,622000,-9.2658,-2,-10,926.58


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 623000
  custom_metrics: {}
  date: 2021-10-23_22-36-23
  done: false
  episode_len_mean: 924.22
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.242199999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 757
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.221139729072299e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.25166446566581724
          entropy_coeff: 0.009999999999999998
          kl: 0.01894003645413452
          policy_loss: 0.12203414506382412
          total_loss: 12048637917.155556
          vf_explained_var: 0.194072887301445
          vf_loss: 12048637917.155556
    num_agent_steps_sampled: 623000
    num_agent_steps_trained: 623000
    num_steps_sampled: 623000
    num_steps_trained: 623000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,623,9381.86,623000,-9.2422,-2,-10,924.22




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 624000
  custom_metrics: {}
  date: 2021-10-23_22-36-46
  done: false
  episode_len_mean: 920.69
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.206899999999848
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 758
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.221139729072299e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.642106130388048
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.030505328708224828
          total_loss: .inf
          vf_explained_var: -0.6666666865348816
          vf_loss: 295069639.35555553
    num_agent_steps_sampled: 624000
    num_agent_steps_trained: 624000
    num_steps_sampled: 624000
    num_steps_trained: 624000
  iterations_since_restore: 624
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,624,9403.86,624000,-9.2069,-2,-10,920.69




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 625000
  custom_metrics: {}
  date: 2021-10-23_22-37-03
  done: false
  episode_len_mean: 916.69
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.166899999999847
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 759
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.831709593608448e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.4154802531003952
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.045214816182851794
          total_loss: .inf
          vf_explained_var: -0.6599128842353821
          vf_loss: 367161155.3666667
    num_agent_steps_sampled: 625000
    num_agent_steps_trained: 625000
    num_steps_sampled: 625000
    num_steps_trained: 625000
  iterations_since_restore: 625
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,625,9421.03,625000,-9.1669,-2,-10,916.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 626000
  custom_metrics: {}
  date: 2021-10-23_22-37-18
  done: false
  episode_len_mean: 915.78
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.157799999999849
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 2
  episodes_total: 761
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.247564390412672e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.12142333487669627
          entropy_coeff: 0.009999999999999998
          kl: 0.004765701997611258
          policy_loss: 0.08144336338672373
          total_loss: 1027354179.5555556
          vf_explained_var: -0.05552535504102707
          vf_loss: 1027354179.5555556
    num_agent_steps_sampled: 626000
    num_agent_steps_trained: 626000
    num_steps_sampled: 626000
    num_steps_trained: 626000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,626,9436.45,626000,-9.1578,-2,-10,915.78


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 627000
  custom_metrics: {}
  date: 2021-10-23_22-37-32
  done: false
  episode_len_mean: 915.42
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.15419999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 762
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.623782195206336e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.016429921487967174
          entropy_coeff: 0.009999999999999998
          kl: 0.010624407976865768
          policy_loss: 0.04621526331951221
          total_loss: 10710573568.0
          vf_explained_var: 0.059931036084890366
          vf_loss: 10710573568.0
    num_agent_steps_sampled: 627000
    num_agent_steps_trained: 627000
    num_steps_sampled: 627000
    num_steps_trained: 627000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,627,9450.13,627000,-9.1542,-2,-10,915.42


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 628000
  custom_metrics: {}
  date: 2021-10-23_22-37-46
  done: false
  episode_len_mean: 915.42
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.15419999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 763
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.623782195206336e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.10897870610157649
          total_loss: 5688203.294444445
          vf_explained_var: 1.3907749973895989e-07
          vf_loss: 5688203.294444445
    num_agent_steps_sampled: 628000
    num_agent_steps_trained: 628000
    num_steps_sampled: 628000
    num_steps_trained: 628000
  iterations_since_restore: 628
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,628,9464.34,628000,-9.1542,-2,-10,915.42


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 629000
  custom_metrics: {}
  date: 2021-10-23_22-38-03
  done: false
  episode_len_mean: 915.42
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.15419999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 764
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.811891097603168e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05052417268355688
          total_loss: 27570764187056.355
          vf_explained_var: -2.5828680350059585e-07
          vf_loss: 27570764187056.355
    num_agent_steps_sampled: 629000
    num_agent_steps_trained: 629000
    num_steps_sampled: 629000
    num_steps_trained: 629000
  iterations_since_restore: 629
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,629,9481.58,629000,-9.1542,-2,-10,915.42




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 630000
  custom_metrics: {}
  date: 2021-10-23_22-38-18
  done: false
  episode_len_mean: 912.86
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.128599999999851
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 765
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.05945548801584e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.07870033395787081
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.14887995090749528
          total_loss: .inf
          vf_explained_var: -0.5797039270401001
          vf_loss: 263077174.75555557
    num_agent_steps_sampled: 630000
    num_agent_steps_trained: 630000
    num_steps_sampled: 630000
    num_steps_trained: 630000
  iterations_since_restore: 630
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,630,9496.51,630000,-9.1286,-2,-10,912.86




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 631000
  custom_metrics: {}
  date: 2021-10-23_22-38-32
  done: false
  episode_len_mean: 912.78
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.127799999999851
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 766
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3589183232023761e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.1651660399304496
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.10738871155513657
          total_loss: .inf
          vf_explained_var: -0.3333335518836975
          vf_loss: 308994204.0888889
    num_agent_steps_sampled: 631000
    num_agent_steps_trained: 631000
    num_steps_sampled: 631000
    num_steps_trained: 631000
  iterations_since_restore: 631
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,631,9510.63,631000,-9.1278,-2,-10,912.78




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 632000
  custom_metrics: {}
  date: 2021-10-23_22-38-48
  done: false
  episode_len_mean: 912.35
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.12349999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 767
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0383774848035636e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.27985760304662916
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.17306412582596142
          total_loss: .inf
          vf_explained_var: 0.016947051510214806
          vf_loss: 831829404.4444444
    num_agent_steps_sampled: 632000
    num_agent_steps_trained: 632000
    num_steps_sampled: 632000
    num_steps_trained: 632000
  iterations_since_restore: 632


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,632,9525.82,632000,-9.1235,-2,-10,912.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 633000
  custom_metrics: {}
  date: 2021-10-23_22-39-00
  done: false
  episode_len_mean: 910.25
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.102499999999852
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 768
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.057566227205346e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.07887331710921394
          entropy_coeff: 0.009999999999999998
          kl: 0.0011558381905622046
          policy_loss: -0.1266257087389628
          total_loss: 353307335.9111111
          vf_explained_var: 0.16899026930332184
          vf_loss: 353307335.9111111
    num_agent_steps_sampled: 633000
    num_agent_steps_trained: 633000
    num_steps_sampled: 633000
    num_steps_trained: 633000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,633,9537.98,633000,-9.1025,-2,-10,910.25


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 634000
  custom_metrics: {}
  date: 2021-10-23_22-39-14
  done: false
  episode_len_mean: 909.89
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.09889999999985
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 769
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.528783113602673e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.011705907682577768
          entropy_coeff: 0.009999999999999998
          kl: 0.0002072111823205584
          policy_loss: 0.1274798442920049
          total_loss: 116525271.75
          vf_explained_var: 0.23149532079696655
          vf_loss: 116525271.35
    num_agent_steps_sampled: 634000
    num_agent_steps_trained: 634000
    num_steps_sampled: 634000
    num_steps_trained: 634000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,634,9551.93,634000,-9.0989,-2,-10,909.89


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 635000
  custom_metrics: {}
  date: 2021-10-23_22-39-29
  done: false
  episode_len_mean: 909.89
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.098899999999851
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 770
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.643915568013365e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.29045017150541147
          entropy_coeff: 0.009999999999999998
          kl: 0.589759105256612
          policy_loss: 0.11752487785286374
          total_loss: 2649465178.8444443
          vf_explained_var: 0.24409078061580658
          vf_loss: 2649465178.8444443
    num_agent_steps_sampled: 635000
    num_agent_steps_trained: 635000
    num_steps_sampled: 635000
    num_steps_trained: 635000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,635,9566.62,635000,-9.0989,-2,-10,909.89


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 636000
  custom_metrics: {}
  date: 2021-10-23_22-39-44
  done: false
  episode_len_mean: 909.81
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.098099999999851
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 771
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1465873352020045e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.03436923838324017
          entropy_coeff: 0.009999999999999998
          kl: 0.0004186261991719675
          policy_loss: -0.14322286976708307
          total_loss: 1977735908.9777777
          vf_explained_var: 0.21739715337753296
          vf_loss: 1977735908.9777777
    num_agent_steps_sampled: 636000
    num_agent_steps_trained: 636000
    num_steps_sampled: 636000
    num_steps_trained: 636000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,636,9581.68,636000,-9.0981,-2,-10,909.81




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 637000
  custom_metrics: {}
  date: 2021-10-23_22-39-59
  done: false
  episode_len_mean: 909.29
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.092899999999851
  episode_reward_min: -9.999999999999831
  episodes_this_iter: 1
  episodes_total: 772
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.732936676010023e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.37580361349715125
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.12030331790447235
          total_loss: .inf
          vf_explained_var: 0.24789519608020782
          vf_loss: 337197557.7638889
    num_agent_steps_sampled: 637000
    num_agent_steps_trained: 637000
    num_steps_sampled: 637000
    num_steps_trained: 637000
  iterations_since_restore: 637
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,637,9597.01,637000,-9.0929,-2,-10,909.29




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 638000
  custom_metrics: {}
  date: 2021-10-23_22-40-16
  done: false
  episode_len_mean: 908.81
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.09799999999985
  episode_reward_min: -10.509999999999842
  episodes_this_iter: 1
  episodes_total: 773
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.599405014015035e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.11933766911841101
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.06616940200328827
          total_loss: .inf
          vf_explained_var: -0.43069979548454285
          vf_loss: 106437651.37777779
    num_agent_steps_sampled: 638000
    num_agent_steps_trained: 638000
    num_steps_sampled: 638000
    num_steps_trained: 638000
  iterations_since_restore: 638

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,638,9613.74,638000,-9.098,-2,-10.51,908.81




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 639000
  custom_metrics: {}
  date: 2021-10-23_22-40-30
  done: false
  episode_len_mean: 907.7
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.08689999999985
  episode_reward_min: -10.509999999999842
  episodes_this_iter: 1
  episodes_total: 774
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2899107521022553e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.03562737272845374
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.10523883104324341
          total_loss: .inf
          vf_explained_var: 0.011964483186602592
          vf_loss: 62018804.8
    num_agent_steps_sampled: 639000
    num_agent_steps_trained: 639000
    num_steps_sampled: 639000
    num_steps_trained: 639000
  iterations_since_restore: 639
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,639,9627.61,639000,-9.0869,-2,-10.51,907.7




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 640000
  custom_metrics: {}
  date: 2021-10-23_22-40-47
  done: false
  episode_len_mean: 907.58
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.08569999999985
  episode_reward_min: -10.509999999999842
  episodes_this_iter: 1
  episodes_total: 775
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9348661281533826e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.21308012008666993
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.07634138001335991
          total_loss: .inf
          vf_explained_var: 0.07094938308000565
          vf_loss: 14602936248.88889
    num_agent_steps_sampled: 640000
    num_agent_steps_trained: 640000
    num_steps_sampled: 640000
    num_steps_trained: 640000
  iterations_since_restore: 640
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,640,9644.46,640000,-9.0857,-2,-10.51,907.58




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 641000
  custom_metrics: {}
  date: 2021-10-23_22-41-03
  done: false
  episode_len_mean: 905.26
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.07239999999985
  episode_reward_min: -10.509999999999842
  episodes_this_iter: 2
  episodes_total: 777
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9022991922300745e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.19823385212156508
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0046460638443628945
          total_loss: .inf
          vf_explained_var: 0.18774841725826263
          vf_loss: 1318268471.1111112
    num_agent_steps_sampled: 641000
    num_agent_steps_trained: 641000
    num_steps_sampled: 641000
    num_steps_trained: 641000
  iterations_since_restore: 64

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,641,9660.6,641000,-9.0724,-2,-10.51,905.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 642000
  custom_metrics: {}
  date: 2021-10-23_22-41-18
  done: false
  episode_len_mean: 904.67
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.09619999999985
  episode_reward_min: -12.37999999999981
  episodes_this_iter: 1
  episodes_total: 778
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.35344878834511e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.11147293696800868
          total_loss: 255347294.2222222
          vf_explained_var: 1.1457337478759655e-07
          vf_loss: 255347294.2222222
    num_agent_steps_sampled: 642000
    num_agent_steps_trained: 642000
    num_steps_sampled: 642000
    num_steps_trained: 642000
  iterations_since_restore: 642
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,642,9676.22,642000,-9.0962,-2,-12.38,904.67




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 643000
  custom_metrics: {}
  date: 2021-10-23_22-41-58
  done: false
  episode_len_mean: 895.77
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.007199999999852
  episode_reward_min: -12.37999999999981
  episodes_this_iter: 2
  episodes_total: 780
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.176724394172555e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.3668355147043864
          total_loss: 180821217188.97778
          vf_explained_var: -6.887648140718738e-08
          vf_loss: 180821217188.97778
    num_agent_steps_sampled: 643000
    num_agent_steps_trained: 643000
    num_steps_sampled: 643000
    num_steps_trained: 643000
  iterations_since_restore: 643
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,643,9716.14,643000,-9.0072,-2,-12.38,895.77




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 644000
  custom_metrics: {}
  date: 2021-10-23_22-42-17
  done: false
  episode_len_mean: 894.24
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -8.991899999999852
  episode_reward_min: -12.37999999999981
  episodes_this_iter: 1
  episodes_total: 781
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0883621970862776e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.36743556525972154
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.1183576977915234
          total_loss: .inf
          vf_explained_var: 0.027844566851854324
          vf_loss: 3091717276012.089
    num_agent_steps_sampled: 644000
    num_agent_steps_trained: 644000
    num_steps_sampled: 644000
    num_steps_trained: 644000
  iterations_since_restore: 644


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,644,9734.66,644000,-8.9919,-2,-12.38,894.24




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 645000
  custom_metrics: {}
  date: 2021-10-23_22-42-31
  done: false
  episode_len_mean: 892.01
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.009199999999856
  episode_reward_min: -12.37999999999981
  episodes_this_iter: 1
  episodes_total: 782
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6325432956294163e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.24030644333817894
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.43745835026105245
          total_loss: .inf
          vf_explained_var: -0.6666667461395264
          vf_loss: 241541065.42222223
    num_agent_steps_sampled: 645000
    num_agent_steps_trained: 645000
    num_steps_sampled: 645000
    num_steps_trained: 645000
  iterations_since_restore: 645


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,645,9749.1,645000,-9.0092,-2,-12.38,892.01




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 646000
  custom_metrics: {}
  date: 2021-10-23_22-42-46
  done: false
  episode_len_mean: 891.38
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.052099999999855
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 783
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.448814943444125e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.1108138683769438
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.17628372775183784
          total_loss: .inf
          vf_explained_var: -0.3333335518836975
          vf_loss: 226428653.84444445
    num_agent_steps_sampled: 646000
    num_agent_steps_trained: 646000
    num_steps_sampled: 646000
    num_steps_trained: 646000
  iterations_since_restore: 646


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,646,9764.09,646000,-9.0521,-2,-14.29,891.38




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 647000
  custom_metrics: {}
  date: 2021-10-23_22-43-03
  done: false
  episode_len_mean: 890.57
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.043799999999855
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 784
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.673222415166188e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.2967047140002251
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.12651793029573227
          total_loss: .inf
          vf_explained_var: 0.5979049205780029
          vf_loss: 2906523356.977778
    num_agent_steps_sampled: 647000
    num_agent_steps_trained: 647000
    num_steps_sampled: 647000
    num_steps_trained: 647000
  iterations_since_restore: 647
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,647,9780.39,647000,-9.0438,-2,-14.29,890.57




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 648000
  custom_metrics: {}
  date: 2021-10-23_22-43-20
  done: false
  episode_len_mean: 888.65
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.053999999999855
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 785
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.509833622749281e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.2808037335673968
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.02707384212149514
          total_loss: .inf
          vf_explained_var: -0.3305385708808899
          vf_loss: 508467485.51111114
    num_agent_steps_sampled: 648000
    num_agent_steps_trained: 648000
    num_steps_sampled: 648000
    num_steps_trained: 648000
  iterations_since_restore: 648
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,648,9797.3,648000,-9.054,-2,-14.29,888.65




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 649000
  custom_metrics: {}
  date: 2021-10-23_22-43-44
  done: false
  episode_len_mean: 881.95
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.076099999999853
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 787
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.26475043412392e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.9096315834257338
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.021097106155422
          total_loss: .inf
          vf_explained_var: -0.9721798300743103
          vf_loss: 1282138458.488889
    num_agent_steps_sampled: 649000
    num_agent_steps_trained: 649000
    num_steps_sampled: 649000
    num_steps_trained: 649000
  iterations_since_restore: 649
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,649,9821.73,649000,-9.0761,-2,-14.29,881.95




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 650000
  custom_metrics: {}
  date: 2021-10-23_22-44-07
  done: false
  episode_len_mean: 874.35
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.049299999999855
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 789
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2397125651185883e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.2870812866422865
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04986082845264011
          total_loss: .inf
          vf_explained_var: -0.3333333432674408
          vf_loss: 1307914272.1777778
    num_agent_steps_sampled: 650000
    num_agent_steps_trained: 650000
    num_steps_sampled: 650000
    num_steps_trained: 650000
  iterations_since_restore: 650

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,650,9844.26,650000,-9.0493,-2,-14.29,874.35




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 651000
  custom_metrics: {}
  date: 2021-10-23_22-44-25
  done: false
  episode_len_mean: 872.45
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.030299999999857
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 790
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8595688476778822e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.36857345236672295
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06212416804499096
          total_loss: .inf
          vf_explained_var: -0.6666666865348816
          vf_loss: 708767594.4888889
    num_agent_steps_sampled: 651000
    num_agent_steps_trained: 651000
    num_steps_sampled: 651000
    num_steps_trained: 651000
  iterations_since_restore: 651


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,651,9862.63,651000,-9.0303,-2,-14.29,872.45




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 652000
  custom_metrics: {}
  date: 2021-10-23_22-44-40
  done: false
  episode_len_mean: 877.28
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.078599999999856
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 791
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.789353271516824e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.0655073622862498
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.8874983727931977
          total_loss: .inf
          vf_explained_var: -0.33333295583724976
          vf_loss: 684171613.3333334
    num_agent_steps_sampled: 652000
    num_agent_steps_trained: 652000
    num_steps_sampled: 652000
    num_steps_trained: 652000
  iterations_since_restore: 652
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,652,9877.95,652000,-9.0786,-2,-14.29,877.28




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 653000
  custom_metrics: {}
  date: 2021-10-23_22-44-55
  done: false
  episode_len_mean: 876.3
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.068799999999856
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 792
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.184029907275235e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.1052788022491667
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04230329708920585
          total_loss: .inf
          vf_explained_var: 0.08489732444286346
          vf_loss: 246224557.68888888
    num_agent_steps_sampled: 653000
    num_agent_steps_trained: 653000
    num_steps_sampled: 653000
    num_steps_trained: 653000
  iterations_since_restore: 653
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,653,9892.6,653000,-9.0688,-2,-14.29,876.3




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 654000
  custom_metrics: {}
  date: 2021-10-23_22-45-12
  done: false
  episode_len_mean: 872.01
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.025899999999856
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 794
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.2760448609128525e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.49082576285840734
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.31917535561240384
          total_loss: .inf
          vf_explained_var: -0.5895342230796814
          vf_loss: 3592485447.111111
    num_agent_steps_sampled: 654000
    num_agent_steps_trained: 654000
    num_steps_sampled: 654000
    num_steps_trained: 654000
  iterations_since_restore: 654


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,654,9909.65,654000,-9.0259,-2,-14.29,872.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 655000
  custom_metrics: {}
  date: 2021-10-23_22-45-28
  done: false
  episode_len_mean: 878.58
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.091599999999856
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 795
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.414067291369277e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.8618732584847344
          entropy_coeff: 0.009999999999999998
          kl: 1.3004023790359498
          policy_loss: -0.05564630022789869
          total_loss: 2616667673.6
          vf_explained_var: -1.0
          vf_loss: 2616667673.6
    num_agent_steps_sampled: 655000
    num_agent_steps_trained: 655000
    num_steps_sampled: 655000
    num_steps_trained: 655000
  iterations_since_restore: 655

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,655,9925.86,655000,-9.0916,-2,-14.29,878.58




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 656000
  custom_metrics: {}
  date: 2021-10-23_22-45-54
  done: false
  episode_len_mean: 875.11
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.056899999999857
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 797
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4121100937053916e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.9982445895671844
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.04790240281985866
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 3477657497.6
    num_agent_steps_sampled: 656000
    num_agent_steps_trained: 656000
    num_steps_sampled: 656000
    num_steps_trained: 656000
  iterations_since_restore: 656
  node_ip: 172.17.0.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,656,9951.22,656000,-9.0569,-2,-14.29,875.11




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 657000
  custom_metrics: {}
  date: 2021-10-23_22-46-17
  done: false
  episode_len_mean: 884.84
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -9.154199999999856
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 799
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1181651405580872e-26
          cur_lr: 5.000000000000001e-05
          entropy: 1.0662490401003095
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.01109875746899181
          total_loss: .inf
          vf_explained_var: -0.9321288466453552
          vf_loss: 1634919086.9333334
    num_agent_steps_sampled: 657000
    num_agent_steps_trained: 657000
    num_steps_sampled: 657000
    num_steps_trained: 657000
  iterations_since_restore: 657


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,657,9974.07,657000,-9.1542,-2,-14.29,884.84




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 658000
  custom_metrics: {}
  date: 2021-10-23_22-46-55
  done: false
  episode_len_mean: 888.24
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -9.188199999999853
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 3
  episodes_total: 802
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1772477108371304e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.8927713082896338
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0698060757584042
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 980218387.5555556
    num_agent_steps_sampled: 658000
    num_agent_steps_trained: 658000
    num_steps_sampled: 658000
    num_steps_trained: 658000
  iterations_since_restore: 658
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,658,10012.7,658000,-9.1882,-2.1,-14.29,888.24




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 659000
  custom_metrics: {}
  date: 2021-10-23_22-47-18
  done: false
  episode_len_mean: 880.35
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -9.109299999999855
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 804
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.765871566255697e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.9110169291496277
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.023196510018573868
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 2523540113.0666666
    num_agent_steps_sampled: 659000
    num_agent_steps_trained: 659000
    num_steps_sampled: 659000
    num_steps_trained: 659000
  iterations_since_restore: 659
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,659,10035.3,659000,-9.1093,-2.1,-14.29,880.35




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 660000
  custom_metrics: {}
  date: 2021-10-23_22-47-40
  done: false
  episode_len_mean: 877.32
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -9.078999999999855
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 805
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.148807349383547e-26
          cur_lr: 5.000000000000001e-05
          entropy: 1.0305769317679936
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.08039725604984495
          total_loss: .inf
          vf_explained_var: 0.09508002549409866
          vf_loss: 48241046203.73333
    num_agent_steps_sampled: 660000
    num_agent_steps_trained: 660000
    num_steps_sampled: 660000
    num_steps_trained: 660000
  iterations_since_restore: 660
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,660,10056.8,660000,-9.079,-2.1,-14.29,877.32




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 661000
  custom_metrics: {}
  date: 2021-10-23_22-48-20
  done: false
  episode_len_mean: 850.56
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.811399999999862
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 4
  episodes_total: 809
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.072321102407532e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.1424687451786466
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.30790755086474947
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 1619722364.4444444
    num_agent_steps_sampled: 661000
    num_agent_steps_trained: 661000
    num_steps_sampled: 661000
    num_steps_trained: 661000
  iterations_since_restore: 661
  node_ip: 172.17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,661,10097.6,661000,-8.8114,-2.1,-14.29,850.56




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 662000
  custom_metrics: {}
  date: 2021-10-23_22-49-07
  done: false
  episode_len_mean: 838.73
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.693099999999863
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 811
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.608481653611298e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.9457278092702229
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.039563015268908604
          total_loss: .inf
          vf_explained_var: -0.6608610153198242
          vf_loss: 3083720499.2
    num_agent_steps_sampled: 662000
    num_agent_steps_trained: 662000
    num_steps_sampled: 662000
    num_steps_trained: 662000
  iterations_since_restore: 662
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,662,10144,662000,-8.6931,-2.1,-14.29,838.73




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 663000
  custom_metrics: {}
  date: 2021-10-23_22-49-22
  done: false
  episode_len_mean: 836.0
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.665799999999864
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 812
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4127224804169472e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.6285084201229943
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.15419377659757932
          total_loss: .inf
          vf_explained_var: 0.3231642544269562
          vf_loss: 75106327028.62222
    num_agent_steps_sampled: 663000
    num_agent_steps_trained: 663000
    num_steps_sampled: 663000
    num_steps_trained: 663000
  iterations_since_restore: 663
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,663,10159.3,663000,-8.6658,-2.1,-14.29,836




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 664000
  custom_metrics: {}
  date: 2021-10-23_22-49-51
  done: false
  episode_len_mean: 828.0
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.585799999999866
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 814
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6190837206254215e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.8734013067351447
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.10206933154000176
          total_loss: .inf
          vf_explained_var: -0.6666665077209473
          vf_loss: 9457561821.866667
    num_agent_steps_sampled: 664000
    num_agent_steps_trained: 664000
    num_steps_sampled: 664000
    num_steps_trained: 664000
  iterations_since_restore: 664
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,664,10188.4,664000,-8.5858,-2.1,-14.29,828




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 665000
  custom_metrics: {}
  date: 2021-10-23_22-50-19
  done: false
  episode_len_mean: 811.4
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.419799999999869
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 3
  episodes_total: 817
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.428625580938131e-25
          cur_lr: 5.000000000000001e-05
          entropy: 1.3015727625952827
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06833245323763953
          total_loss: .inf
          vf_explained_var: -0.6072021722793579
          vf_loss: 1436891687.8222222
    num_agent_steps_sampled: 665000
    num_agent_steps_trained: 665000
    num_steps_sampled: 665000
    num_steps_trained: 665000
  iterations_since_restore: 665
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,665,10216.2,665000,-8.4198,-2.1,-14.29,811.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 666000
  custom_metrics: {}
  date: 2021-10-23_22-50-35
  done: false
  episode_len_mean: 810.86
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.414399999999869
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 818
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.142938371407193e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.02351205713219113
          entropy_coeff: 0.009999999999999998
          kl: 0.007959653519921832
          policy_loss: 0.031991320268975366
          total_loss: 79403055.8
          vf_explained_var: -0.3333330452442169
          vf_loss: 79403055.8
    num_agent_steps_sampled: 666000
    num_agent_steps_trained: 666000
    num_steps_sampled: 666000
    num_steps_trained: 666000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,666,10232.5,666000,-8.4144,-2.1,-14.29,810.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 667000
  custom_metrics: {}
  date: 2021-10-23_22-50-51
  done: false
  episode_len_mean: 809.17
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.397499999999868
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 819
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.142938371407193e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.12901389482948517
          entropy_coeff: 0.009999999999999998
          kl: 0.038494918706853946
          policy_loss: -0.027615878896580803
          total_loss: 682983729106.4889
          vf_explained_var: 0.06309112906455994
          vf_loss: 682983729106.4889
    num_agent_steps_sampled: 667000
    num_agent_steps_trained: 667000
    num_steps_sampled: 667000
    num_steps_trained: 667000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,667,10248.3,667000,-8.3975,-2.1,-14.29,809.17




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 668000
  custom_metrics: {}
  date: 2021-10-23_22-51-08
  done: false
  episode_len_mean: 809.43
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.400099999999869
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 820
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2214407557110794e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.3644813471370273
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.2711260356836849
          total_loss: .inf
          vf_explained_var: 0.27684286236763
          vf_loss: 17431955456.0
    num_agent_steps_sampled: 668000
    num_agent_steps_trained: 668000
    num_steps_sampled: 668000
    num_steps_trained: 668000
  iterations_since_restore: 668
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,668,10265.1,668000,-8.4001,-2.1,-14.29,809.43




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 669000
  custom_metrics: {}
  date: 2021-10-23_22-51-21
  done: false
  episode_len_mean: 806.64
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.372199999999872
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 821
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8321611335666192e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.2700521891315778
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.23954351478152805
          total_loss: .inf
          vf_explained_var: -0.6666666865348816
          vf_loss: 613011342.9333333
    num_agent_steps_sampled: 669000
    num_agent_steps_trained: 669000
    num_steps_sampled: 669000
    num_steps_trained: 669000
  iterations_since_restore: 669
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,669,10278.3,669000,-8.3722,-2.1,-14.29,806.64




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 670000
  custom_metrics: {}
  date: 2021-10-23_22-51-37
  done: false
  episode_len_mean: 805.28
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.358599999999871
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 822
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7482417003499287e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.23442719396617678
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.10927604105737475
          total_loss: .inf
          vf_explained_var: -0.11336604505777359
          vf_loss: 1538561481.8
    num_agent_steps_sampled: 670000
    num_agent_steps_trained: 670000
    num_steps_sampled: 670000
    num_steps_trained: 670000
  iterations_since_restore: 670
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,670,10294,670000,-8.3586,-2.1,-14.29,805.28




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 671000
  custom_metrics: {}
  date: 2021-10-23_22-52-00
  done: false
  episode_len_mean: 799.4
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.299799999999873
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 824
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.1223625505248934e-24
          cur_lr: 5.000000000000001e-05
          entropy: 1.0519090394179027
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.03307102591627174
          total_loss: .inf
          vf_explained_var: 0.5017011165618896
          vf_loss: 165135449292.8
    num_agent_steps_sampled: 671000
    num_agent_steps_trained: 671000
    num_steps_sampled: 671000
    num_steps_trained: 671000
  iterations_since_restore: 671
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,671,10316.8,671000,-8.2998,-2.1,-14.29,799.4




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 672000
  custom_metrics: {}
  date: 2021-10-23_22-52-29
  done: false
  episode_len_mean: 790.62
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.211999999999875
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 826
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.1835438257873416e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.4581221921576394
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07099316848648919
          total_loss: .inf
          vf_explained_var: -0.06654448807239532
          vf_loss: 32305051556.97778
    num_agent_steps_sampled: 672000
    num_agent_steps_trained: 672000
    num_steps_sampled: 672000
    num_steps_trained: 672000
  iterations_since_restore: 672


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,672,10346.3,672000,-8.212,-2.1,-14.29,790.62




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 673000
  custom_metrics: {}
  date: 2021-10-23_22-52-57
  done: false
  episode_len_mean: 776.74
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.073199999999877
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 828
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.275315738681012e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.7129507019288011
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06287883973369995
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 6077001301.333333
    num_agent_steps_sampled: 673000
    num_agent_steps_trained: 673000
    num_steps_sampled: 673000
    num_steps_trained: 673000
  iterations_since_restore: 673
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,673,10373.9,673000,-8.0732,-2.1,-14.29,776.74


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 674000
  custom_metrics: {}
  date: 2021-10-23_22-53-12
  done: false
  episode_len_mean: 773.93
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -8.045099999999877
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 829
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3912973608021517e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.06752592358324262
          entropy_coeff: 0.009999999999999998
          kl: 0.0042652301179866
          policy_loss: -0.07735268572966257
          total_loss: 2393865733.688889
          vf_explained_var: 0.154536634683609
          vf_loss: 2393865733.688889
    num_agent_steps_sampled: 674000
    num_agent_steps_trained: 674000
    num_steps_sampled: 674000
    num_steps_trained: 674000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,674,10389.2,674000,-8.0451,-2.1,-14.29,773.93




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 675000
  custom_metrics: {}
  date: 2021-10-23_22-53-46
  done: false
  episode_len_mean: 760.89
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.91469999999988
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 3
  episodes_total: 832
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.956486804010759e-24
          cur_lr: 5.000000000000001e-05
          entropy: 1.2243083622720508
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.011501080418626467
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 7252085791.288889
    num_agent_steps_sampled: 675000
    num_agent_steps_trained: 675000
    num_steps_sampled: 675000
    num_steps_trained: 675000
  iterations_since_restore: 675
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,675,10422.8,675000,-7.9147,-2.1,-14.29,760.89




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 676000
  custom_metrics: {}
  date: 2021-10-23_22-54-08
  done: false
  episode_len_mean: 750.68
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.812599999999882
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 834
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.043473020601614e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.6451543188757367
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.04068184461858537
          total_loss: .inf
          vf_explained_var: -0.6484314203262329
          vf_loss: 2621916017.7777777
    num_agent_steps_sampled: 676000
    num_agent_steps_trained: 676000
    num_steps_sampled: 676000
    num_steps_trained: 676000
  iterations_since_restore: 676
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,676,10445.3,676000,-7.8126,-2.1,-14.29,750.68




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 677000
  custom_metrics: {}
  date: 2021-10-23_22-54-30
  done: false
  episode_len_mean: 749.41
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.799899999999882
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 835
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.56520953090242e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.3415710899564955
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.2314079334338506
          total_loss: .inf
          vf_explained_var: 0.20088604092597961
          vf_loss: 58759485917.86667
    num_agent_steps_sampled: 677000
    num_agent_steps_trained: 677000
    num_steps_sampled: 677000
    num_steps_trained: 677000
  iterations_since_restore: 677
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,677,10466.7,677000,-7.7999,-2.1,-14.29,749.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 678000
  custom_metrics: {}
  date: 2021-10-23_22-54-46
  done: false
  episode_len_mean: 741.33
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.719099999999885
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 837
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.34781429635363e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.23335381547609965
          entropy_coeff: 0.009999999999999998
          kl: 0.001979083971068027
          policy_loss: 0.11709402087661955
          total_loss: 27851293536.711113
          vf_explained_var: 0.25620853900909424
          vf_loss: 27851293536.711113
    num_agent_steps_sampled: 678000
    num_agent_steps_trained: 678000
    num_steps_sampled: 678000
    num_steps_trained: 678000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,678,10482.7,678000,-7.7191,-2.1,-14.29,741.33


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 679000
  custom_metrics: {}
  date: 2021-10-23_22-55-01
  done: false
  episode_len_mean: 741.1
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.716799999999885
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 838
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.173907148176815e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.07495513806740443
          total_loss: 1744421270323.2
          vf_explained_var: -5.430645444448601e-08
          vf_loss: 1744421270323.2
    num_agent_steps_sampled: 679000
    num_agent_steps_trained: 679000
    num_steps_sampled: 679000
    num_steps_trained: 679000
  iterations_since_restore: 679
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,679,10497.6,679000,-7.7168,-2.1,-14.29,741.1


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 680000
  custom_metrics: {}
  date: 2021-10-23_22-55-17
  done: false
  episode_len_mean: 740.52
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.710999999999884
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 839
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.869535740884075e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.04496632152133518
          entropy_coeff: 0.009999999999999998
          kl: 0.0012873531811545627
          policy_loss: -0.27577702725927034
          total_loss: 169015349655051.38
          vf_explained_var: 0.01348471362143755
          vf_loss: 169015349655051.38
    num_agent_steps_sampled: 680000
    num_agent_steps_trained: 680000
    num_steps_sampled: 680000
    num_steps_trained: 680000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,680,10513.3,680000,-7.711,-2.1,-14.29,740.52




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 681000
  custom_metrics: {}
  date: 2021-10-23_22-55-48
  done: false
  episode_len_mean: 738.23
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.688099999999886
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 840
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9347678704420376e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.1934682254989942
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.19849014613363478
          total_loss: .inf
          vf_explained_var: 0.03450819477438927
          vf_loss: 834881701933.5111
    num_agent_steps_sampled: 681000
    num_agent_steps_trained: 681000
    num_steps_sampled: 681000
    num_steps_trained: 681000
  iterations_since_restore: 681
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,681,10545.1,681000,-7.6881,-2.1,-14.29,738.23




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 682000
  custom_metrics: {}
  date: 2021-10-23_22-56-06
  done: false
  episode_len_mean: 737.42
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.679999999999886
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 841
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.4021518056630565e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.29071193999714323
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.014688364499145084
          total_loss: .inf
          vf_explained_var: 0.2629297971725464
          vf_loss: 27129389636.266666
    num_agent_steps_sampled: 682000
    num_agent_steps_trained: 682000
    num_steps_sampled: 682000
    num_steps_trained: 682000
  iterations_since_restore: 682


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,682,10562.8,682000,-7.68,-2.1,-14.29,737.42




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 683000
  custom_metrics: {}
  date: 2021-10-23_22-56-23
  done: false
  episode_len_mean: 735.88
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.6645999999998855
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 842
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.6032277084945856e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.33494832184579637
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.15153486099508073
          total_loss: .inf
          vf_explained_var: -0.3333337903022766
          vf_loss: 29393930759.822224
    num_agent_steps_sampled: 683000
    num_agent_steps_trained: 683000
    num_steps_sampled: 683000
    num_steps_trained: 683000
  iterations_since_restore: 68

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,683,10579.4,683000,-7.6646,-2.1,-14.29,735.88




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 684000
  custom_metrics: {}
  date: 2021-10-23_22-56-44
  done: false
  episode_len_mean: 731.18
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.617599999999886
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 844
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.904841562741882e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.128997403383255
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.031612446573045516
          total_loss: .inf
          vf_explained_var: -0.33333343267440796
          vf_loss: 17365686954.666668
    num_agent_steps_sampled: 684000
    num_agent_steps_trained: 684000
    num_steps_sampled: 684000
    num_steps_trained: 684000
  iterations_since_restore: 684
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,684,10600.1,684000,-7.6176,-2.1,-14.29,731.18




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 685000
  custom_metrics: {}
  date: 2021-10-23_22-57-01
  done: false
  episode_len_mean: 728.63
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.5920999999998875
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 845
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.485726234411282e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.29548808071348404
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.13141683273845248
          total_loss: .inf
          vf_explained_var: 0.007309250067919493
          vf_loss: 29241805875.2
    num_agent_steps_sampled: 685000
    num_agent_steps_trained: 685000
    num_steps_sampled: 685000
    num_steps_trained: 685000
  iterations_since_restore: 685
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,685,10617.4,685000,-7.5921,-2.1,-14.29,728.63




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 686000
  custom_metrics: {}
  date: 2021-10-23_22-57-21
  done: false
  episode_len_mean: 721.95
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.52529999999989
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 847
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2285893516169232e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.4365408197045326
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.10743025541305543
          total_loss: .inf
          vf_explained_var: -0.08814328908920288
          vf_loss: 41673924132.977776
    num_agent_steps_sampled: 686000
    num_agent_steps_trained: 686000
    num_steps_sampled: 686000
    num_steps_trained: 686000
  iterations_since_restore: 686
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,686,10638,686000,-7.5253,-2.1,-14.29,721.95




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 687000
  custom_metrics: {}
  date: 2021-10-23_22-57-34
  done: false
  episode_len_mean: 720.3
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.508799999999891
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 848
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.342884027425384e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.1125705083211263
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.30612030542559093
          total_loss: .inf
          vf_explained_var: -0.5183854103088379
          vf_loss: 15225831695.644444
    num_agent_steps_sampled: 687000
    num_agent_steps_trained: 687000
    num_steps_sampled: 687000
    num_steps_trained: 687000
  iterations_since_restore: 687
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,687,10650.2,687000,-7.5088,-2.1,-14.29,720.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 688000
  custom_metrics: {}
  date: 2021-10-23_22-57-48
  done: false
  episode_len_mean: 719.82
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.50399999999989
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 849
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.014326041138076e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.08509716010755963
          entropy_coeff: 0.009999999999999998
          kl: 0.00116040514959271
          policy_loss: -0.08233971761332617
          total_loss: 7117666755834.312
          vf_explained_var: 0.06055848300457001
          vf_loss: 7117666755834.312
    num_agent_steps_sampled: 688000
    num_agent_steps_trained: 688000
    num_steps_sampled: 688000
    num_steps_trained: 688000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,688,10664.3,688000,-7.504,-2.1,-14.29,719.82




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 689000
  custom_metrics: {}
  date: 2021-10-23_22-58-03
  done: false
  episode_len_mean: 720.71
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.512899999999891
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 850
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.507163020569038e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.1299048086007436
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.28216854797469243
          total_loss: .inf
          vf_explained_var: 0.07160026580095291
          vf_loss: 151982992060.44446
    num_agent_steps_sampled: 689000
    num_agent_steps_trained: 689000
    num_steps_sampled: 689000
    num_steps_trained: 689000
  iterations_since_restore: 689
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,689,10679.3,689000,-7.5129,-2.1,-14.29,720.71


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 690000
  custom_metrics: {}
  date: 2021-10-23_22-58-18
  done: false
  episode_len_mean: 720.3
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.50879999999989
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 851
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.760744530853557e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.11231768495506711
          entropy_coeff: 0.009999999999999998
          kl: 0.0013116616542649556
          policy_loss: 0.18962625223729346
          total_loss: 394425406714.3111
          vf_explained_var: 0.14463461935520172
          vf_loss: 394425406714.3111
    num_agent_steps_sampled: 690000
    num_agent_steps_trained: 690000
    num_steps_sampled: 690000
    num_steps_trained: 690000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,690,10694.1,690000,-7.5088,-2.1,-14.29,720.3




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 691000
  custom_metrics: {}
  date: 2021-10-23_22-58-33
  done: false
  episode_len_mean: 720.07
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.50649999999989
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 852
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8803722654267784e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.3007370564672682
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0050225047601593865
          total_loss: .inf
          vf_explained_var: 0.019458653405308723
          vf_loss: 190924238119.82224
    num_agent_steps_sampled: 691000
    num_agent_steps_trained: 691000
    num_steps_sampled: 691000
    num_steps_trained: 691000
  iterations_since_restore: 691

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,691,10709.4,691000,-7.5065,-2.1,-14.29,720.07




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 692000
  custom_metrics: {}
  date: 2021-10-23_22-58-50
  done: false
  episode_len_mean: 720.45
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.510299999999888
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 853
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.820558398140168e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.6002530177227325
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.022345231970151265
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 66298942250.666664
    num_agent_steps_sampled: 692000
    num_agent_steps_trained: 692000
    num_steps_sampled: 692000
    num_steps_trained: 692000
  iterations_since_restore: 692
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,692,10726.6,692000,-7.5103,-2.1,-14.29,720.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 693000
  custom_metrics: {}
  date: 2021-10-23_22-59-07
  done: false
  episode_len_mean: 717.42
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.479999999999889
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 855
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.2308375972102525e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.24079666799969143
          entropy_coeff: 0.009999999999999998
          kl: 0.0022521719279666287
          policy_loss: 0.031096384550134342
          total_loss: 52260524646.4
          vf_explained_var: 0.07429508864879608
          vf_loss: 52260524646.4
    num_agent_steps_sampled: 693000
    num_agent_steps_trained: 693000
    num_steps_sampled: 693000
    num_steps_trained: 693000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,693,10743.1,693000,-7.48,-2.1,-14.29,717.42




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 694000
  custom_metrics: {}
  date: 2021-10-23_22-59-29
  done: false
  episode_len_mean: 714.28
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.448599999999891
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 856
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1154187986051263e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.8424943049748739
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.09821528395016989
          total_loss: .inf
          vf_explained_var: -0.19449664652347565
          vf_loss: 136023822244.97778
    num_agent_steps_sampled: 694000
    num_agent_steps_trained: 694000
    num_steps_sampled: 694000
    num_steps_trained: 694000
  iterations_since_restore: 694


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,694,10764.8,694000,-7.4486,-2.1,-14.29,714.28




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 695000
  custom_metrics: {}
  date: 2021-10-23_22-59-46
  done: false
  episode_len_mean: 713.74
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.44319999999989
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 858
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.173128197907689e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.41507316066159144
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.05673854222728147
          total_loss: .inf
          vf_explained_var: -0.4849734306335449
          vf_loss: 20214910981.68889
    num_agent_steps_sampled: 695000
    num_agent_steps_trained: 695000
    num_steps_sampled: 695000
    num_steps_trained: 695000
  iterations_since_restore: 695
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,695,10782.1,695000,-7.4432,-2.1,-14.29,713.74




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 696000
  custom_metrics: {}
  date: 2021-10-23_23-00-05
  done: false
  episode_len_mean: 715.24
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.4581999999998905
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 859
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.759692296861536e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.40893496208720737
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1356908931914303
          total_loss: .inf
          vf_explained_var: -0.11285801231861115
          vf_loss: 25875433033.955555
    num_agent_steps_sampled: 696000
    num_agent_steps_trained: 696000
    num_steps_sampled: 696000
    num_steps_trained: 696000
  iterations_since_restore: 696


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,696,10801.1,696000,-7.4582,-2.1,-14.29,715.24




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 697000
  custom_metrics: {}
  date: 2021-10-23_23-00-23
  done: false
  episode_len_mean: 715.41
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.45989999999989
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 860
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.1395384452923e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.45142376820246377
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.20261830273601744
          total_loss: .inf
          vf_explained_var: 0.10620108991861343
          vf_loss: 852548861496.8889
    num_agent_steps_sampled: 697000
    num_agent_steps_trained: 697000
    num_steps_sampled: 697000
    num_steps_trained: 697000
  iterations_since_restore: 697
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,697,10819.6,697000,-7.4599,-2.1,-14.29,715.41




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 698000
  custom_metrics: {}
  date: 2021-10-23_23-00-41
  done: false
  episode_len_mean: 710.33
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.40909999999989
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 862
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.070930766793845e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.3337234070731534
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1229146338171429
          total_loss: .inf
          vf_explained_var: 0.09510394930839539
          vf_loss: 104774753109.33333
    num_agent_steps_sampled: 698000
    num_agent_steps_trained: 698000
    num_steps_sampled: 698000
    num_steps_trained: 698000
  iterations_since_restore: 698
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,698,10837,698000,-7.4091,-2.1,-14.29,710.33




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 699000
  custom_metrics: {}
  date: 2021-10-23_23-01-00
  done: false
  episode_len_mean: 708.05
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.386299999999891
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 863
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6063961501907679e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.23886902001168991
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06954741693205303
          total_loss: .inf
          vf_explained_var: -0.666666567325592
          vf_loss: 17630071796.622223
    num_agent_steps_sampled: 699000
    num_agent_steps_trained: 699000
    num_steps_sampled: 699000
    num_steps_trained: 699000
  iterations_since_restore: 699
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,699,10856.2,699000,-7.3863,-2.1,-14.29,708.05




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 700000
  custom_metrics: {}
  date: 2021-10-23_23-01-16
  done: false
  episode_len_mean: 705.29
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.358699999999892
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 864
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4095942252861514e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.2613830741908815
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.21185174981753033
          total_loss: .inf
          vf_explained_var: -0.07391827553510666
          vf_loss: 33983542670.22222
    num_agent_steps_sampled: 700000
    num_agent_steps_trained: 700000
    num_steps_sampled: 700000
    num_steps_trained: 700000
  iterations_since_restore: 700


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,700,10872,700000,-7.3587,-2.1,-14.29,705.29


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 701000
  custom_metrics: {}
  date: 2021-10-23_23-01-33
  done: false
  episode_len_mean: 706.05
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.366299999999892
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 865
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6143913379292283e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.3370154331175144
          entropy_coeff: 0.009999999999999998
          kl: 0.010314845229123213
          policy_loss: 0.07718413174152375
          total_loss: 22842294419.91111
          vf_explained_var: -0.36618831753730774
          vf_loss: 22842294419.91111
    num_agent_steps_sampled: 701000
    num_agent_steps_trained: 701000
    num_steps_sampled: 701000
    num_steps_trained: 701000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,701,10889.1,701000,-7.3663,-2.1,-14.29,706.05




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 702000
  custom_metrics: {}
  date: 2021-10-23_23-01-55
  done: false
  episode_len_mean: 698.7
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.292799999999893
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 867
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6143913379292283e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.5292325068679121
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.30121830138895245
          total_loss: .inf
          vf_explained_var: -0.05170735344290733
          vf_loss: 102877778705.06667
    num_agent_steps_sampled: 702000
    num_agent_steps_trained: 702000
    num_steps_sampled: 702000
    num_steps_trained: 702000
  iterations_since_restore: 702
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,702,10911.2,702000,-7.2928,-2.1,-14.29,698.7




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 703000
  custom_metrics: {}
  date: 2021-10-23_23-02-54
  done: false
  episode_len_mean: 676.0
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.0657999999999
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 4
  episodes_total: 871
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.421587006893841e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.8747790892918904
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.017889368699656592
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 36351997292.08889
    num_agent_steps_sampled: 703000
    num_agent_steps_trained: 703000
    num_steps_sampled: 703000
    num_steps_trained: 703000
  iterations_since_restore: 703
  node_ip: 172.17.0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,703,10970.3,703000,-7.0658,-2.1,-14.29,676




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 704000
  custom_metrics: {}
  date: 2021-10-23_23-03-17
  done: false
  episode_len_mean: 670.34
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -7.0091999999999
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 872
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.132380510340763e-22
          cur_lr: 5.000000000000001e-05
          entropy: 1.2382903032832675
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03386857095691893
          total_loss: .inf
          vf_explained_var: -0.9908738136291504
          vf_loss: 91693939916.8
    num_agent_steps_sampled: 704000
    num_agent_steps_trained: 704000
    num_steps_sampled: 704000
    num_steps_trained: 704000
  iterations_since_restore: 704
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,704,10992.6,704000,-7.0092,-2.1,-14.29,670.34




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 705000
  custom_metrics: {}
  date: 2021-10-23_23-03-53
  done: false
  episode_len_mean: 649.61
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -6.791999999999905
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 4
  episodes_total: 876
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2198570765511142e-21
          cur_lr: 5.000000000000001e-05
          entropy: 1.011981389257643
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02399707633174128
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 25976655462.4
    num_agent_steps_sampled: 705000
    num_agent_steps_trained: 705000
    num_steps_sampled: 705000
    num_steps_trained: 705000
  iterations_since_restore: 705
  node_ip: 172.17.0.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,705,11029,705000,-6.792,-2.1,-14.29,649.61


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 706000
  custom_metrics: {}
  date: 2021-10-23_23-04-10
  done: false
  episode_len_mean: 647.9
  episode_media: {}
  episode_reward_max: -2.099999999999999
  episode_reward_mean: -6.7649999999999055
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 1
  episodes_total: 877
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.829785614826672e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.9872619416978624
          entropy_coeff: 0.009999999999999998
          kl: 1.5757380247116088
          policy_loss: -0.05852375477552414
          total_loss: 34115002891.377777
          vf_explained_var: -0.2963881492614746
          vf_loss: 34115002891.377777
    num_agent_steps_sampled: 706000
    num_agent_steps_trained: 706000
    num_steps_sampled: 706000
    num_steps_trained: 706000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,706,11046.4,706000,-6.765,-2.1,-14.29,647.9




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 707000
  custom_metrics: {}
  date: 2021-10-23_23-04-49
  done: false
  episode_len_mean: 638.53
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -6.641599999999908
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 3
  episodes_total: 880
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7446784222400077e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.8139316419760386
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.15621274958054224
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 25420171753.244446
    num_agent_steps_sampled: 707000
    num_agent_steps_trained: 707000
    num_steps_sampled: 707000
    num_steps_trained: 707000
  iterations_since_restore: 707
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,707,11084.8,707000,-6.6416,-2.5,-14.29,638.53




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 708000
  custom_metrics: {}
  date: 2021-10-23_23-05-12
  done: false
  episode_len_mean: 631.63
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -6.53299999999991
  episode_reward_min: -14.289999999999809
  episodes_this_iter: 2
  episodes_total: 882
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.117017633360013e-21
          cur_lr: 5.000000000000001e-05
          entropy: 1.0254936112297905
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06938661022318734
          total_loss: .inf
          vf_explained_var: -0.717592716217041
          vf_loss: 27025922480.355556
    num_agent_steps_sampled: 708000
    num_agent_steps_trained: 708000
    num_steps_sampled: 708000
    num_steps_trained: 708000
  iterations_since_restore: 708
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,708,11107.7,708000,-6.533,-2.5,-14.29,631.63


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 709000
  custom_metrics: {}
  date: 2021-10-23_23-05-29
  done: false
  episode_len_mean: 631.56
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -6.48309999999991
  episode_reward_min: -12.739999999999938
  episodes_this_iter: 1
  episodes_total: 883
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.175526450040017e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.9528406878312429
          entropy_coeff: 0.009999999999999998
          kl: 1.591557772292031
          policy_loss: 0.04448355618450377
          total_loss: 11693705261.51111
          vf_explained_var: -0.9979416131973267
          vf_loss: 11693705261.51111
    num_agent_steps_sampled: 709000
    num_agent_steps_trained: 709000
    num_steps_sampled: 709000
    num_steps_trained: 709000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,709,11124.9,709000,-6.4831,-2.5,-12.74,631.56




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 710000
  custom_metrics: {}
  date: 2021-10-23_23-05-53
  done: false
  episode_len_mean: 626.18
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -6.40009999999991
  episode_reward_min: -12.739999999999938
  episodes_this_iter: 2
  episodes_total: 885
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.263289675060027e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.9406127446227603
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.11247139838006762
          total_loss: .inf
          vf_explained_var: 0.1612965166568756
          vf_loss: 926395253282.1333
    num_agent_steps_sampled: 710000
    num_agent_steps_trained: 710000
    num_steps_sampled: 710000
    num_steps_trained: 710000
  iterations_since_restore: 710
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,710,11148.3,710000,-6.4001,-2.5,-12.74,626.18




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 711000
  custom_metrics: {}
  date: 2021-10-23_23-06-36
  done: false
  episode_len_mean: 611.59
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -6.115899999999914
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 4
  episodes_total: 889
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3894934512590037e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.2002621988455455
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1336243648495939
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 36902587687.82222
    num_agent_steps_sampled: 711000
    num_agent_steps_trained: 711000
    num_steps_sampled: 711000
    num_steps_trained: 711000
  iterations_since_restore: 711
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,711,11191.5,711000,-6.1159,-2.5,-9.77,611.59




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 712000
  custom_metrics: {}
  date: 2021-10-23_23-07-12
  done: false
  episode_len_mean: 598.8
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -5.987999999999917
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 3
  episodes_total: 892
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.084240176888506e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.4363418168491788
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04080118559714821
          total_loss: .inf
          vf_explained_var: -0.668829619884491
          vf_loss: 26102082685.155556
    num_agent_steps_sampled: 712000
    num_agent_steps_trained: 712000
    num_steps_sampled: 712000
    num_steps_trained: 712000
  iterations_since_restore: 712
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,712,11228.1,712000,-5.988,-2.5,-9.77,598.8




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 713000
  custom_metrics: {}
  date: 2021-10-23_23-07-42
  done: false
  episode_len_mean: 590.58
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -5.9057999999999184
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 2
  episodes_total: 894
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1263602653327594e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.4261872039900885
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.9277232091459963
          total_loss: .inf
          vf_explained_var: -0.5818614363670349
          vf_loss: 37886361417.95556
    num_agent_steps_sampled: 713000
    num_agent_steps_trained: 713000
    num_steps_sampled: 713000
    num_steps_trained: 713000
  iterations_since_restore: 713
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,713,11257.2,713000,-5.9058,-2.5,-9.77,590.58


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 714000
  custom_metrics: {}
  date: 2021-10-23_23-08-04
  done: false
  episode_len_mean: 588.19
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -5.881899999999919
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 2
  episodes_total: 896
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6895403979991394e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.0041274348894755
          entropy_coeff: 0.009999999999999998
          kl: 1.5956202775239945
          policy_loss: -0.11094764777355724
          total_loss: 66482165464.17778
          vf_explained_var: -0.4816681742668152
          vf_loss: 66482165464.17778
    num_agent_steps_sampled: 714000
    num_agent_steps_trained: 714000
    num_steps_sampled: 714000
    num_steps_trained: 714000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,714,11279.7,714000,-5.8819,-2.5,-9.77,588.19




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 715000
  custom_metrics: {}
  date: 2021-10-23_23-08-44
  done: false
  episode_len_mean: 580.47
  episode_media: {}
  episode_reward_max: -2.4999999999999907
  episode_reward_mean: -5.8046999999999205
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 3
  episodes_total: 899
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.03431059699871e-20
          cur_lr: 5.000000000000001e-05
          entropy: 1.174041932821274
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0004888400849368837
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 19900538766.22222
    num_agent_steps_sampled: 715000
    num_agent_steps_trained: 715000
    num_steps_sampled: 715000
    num_steps_trained: 715000
  iterations_since_restore: 715
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,715,11319.6,715000,-5.8047,-2.5,-9.77,580.47




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 716000
  custom_metrics: {}
  date: 2021-10-23_23-09-32
  done: false
  episode_len_mean: 579.28
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.79279999999992
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 2
  episodes_total: 901
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0551465895498063e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.1934309436215296
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.16046535422404606
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 73983538585.6
    num_agent_steps_sampled: 716000
    num_agent_steps_trained: 716000
    num_steps_sampled: 716000
    num_steps_trained: 716000
  iterations_since_restore: 716
  node_ip: 172.17.0.2
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,716,11367.6,716000,-5.7928,-2.24,-9.77,579.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 717000
  custom_metrics: {}
  date: 2021-10-23_23-09-56
  done: false
  episode_len_mean: 586.09
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.860899999999918
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 2
  episodes_total: 903
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5827198843247093e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.8974151859680811
          entropy_coeff: 0.009999999999999998
          kl: 0.6519004080030654
          policy_loss: -0.13865247981415854
          total_loss: 96705004339.2
          vf_explained_var: 0.40853649377822876
          vf_loss: 96705004339.2
    num_agent_steps_sampled: 717000
    num_agent_steps_trained: 717000
    num_steps_sampled: 717000
    num_steps_trained: 717000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,717,11391.9,717000,-5.8609,-2.24,-9.77,586.09




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 718000
  custom_metrics: {}
  date: 2021-10-23_23-10-36
  done: false
  episode_len_mean: 576.68
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.766799999999921
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 4
  episodes_total: 907
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3740798264870633e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.128702539867825
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.038791173468861316
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 18827001571.555557
    num_agent_steps_sampled: 718000
    num_agent_steps_trained: 718000
    num_steps_sampled: 718000
    num_steps_trained: 718000
  iterations_since_restore: 718
  node_ip: 172.17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,718,11431.6,718000,-5.7668,-2.24,-9.77,576.68




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 719000
  custom_metrics: {}
  date: 2021-10-23_23-11-10
  done: false
  episode_len_mean: 576.68
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.766799999999922
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 2
  episodes_total: 909
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.561119739730595e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.3768575913376278
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.07945037798749076
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 16460280587.377777
    num_agent_steps_sampled: 719000
    num_agent_steps_trained: 719000
    num_steps_sampled: 719000
    num_steps_trained: 719000
  iterations_since_restore: 719
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,719,11465.5,719000,-5.7668,-2.24,-9.77,576.68


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 720000
  custom_metrics: {}
  date: 2021-10-23_23-11-25
  done: false
  episode_len_mean: 577.52
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.775199999999922
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 1
  episodes_total: 910
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.341679609595893e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.2206636230150858
          entropy_coeff: 0.009999999999999998
          kl: 3.1601460377375283
          policy_loss: -0.0576897338239683
          total_loss: 97489713379.55556
          vf_explained_var: -0.6377882361412048
          vf_loss: 97489713379.55556
    num_agent_steps_sampled: 720000
    num_agent_steps_trained: 720000
    num_steps_sampled: 720000
    num_steps_trained: 720000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,720,11480,720000,-5.7752,-2.24,-9.77,577.52




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 721000
  custom_metrics: {}
  date: 2021-10-23_23-11-59
  done: false
  episode_len_mean: 574.78
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.74779999999992
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 3
  episodes_total: 913
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.012519414393841e-19
          cur_lr: 5.000000000000001e-05
          entropy: 1.2221671091185675
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.052746045920583934
          total_loss: .inf
          vf_explained_var: -0.5275697708129883
          vf_loss: 17009665046.755556
    num_agent_steps_sampled: 721000
    num_agent_steps_trained: 721000
    num_steps_sampled: 721000
    num_steps_trained: 721000
  iterations_since_restore: 721
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,721,11514.8,721000,-5.7478,-2.24,-9.77,574.78




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 722000
  custom_metrics: {}
  date: 2021-10-23_23-12-37
  done: false
  episode_len_mean: 572.43
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.724299999999922
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 3
  episodes_total: 916
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.201877912159076e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.2743108285797966
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.031266403736339675
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 17258677361.77778
    num_agent_steps_sampled: 722000
    num_agent_steps_trained: 722000
    num_steps_sampled: 722000
    num_steps_trained: 722000
  iterations_since_restore: 722
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,722,11552.1,722000,-5.7243,-2.24,-9.77,572.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 723000
  custom_metrics: {}
  date: 2021-10-23_23-13-01
  done: false
  episode_len_mean: 569.21
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.692099999999923
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 2
  episodes_total: 918
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8028168682386144e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.464952003955841
          entropy_coeff: 0.009999999999999998
          kl: 1.7392025897900263
          policy_loss: -0.03525357809331682
          total_loss: 446198981700.26666
          vf_explained_var: 0.13168740272521973
          vf_loss: 446198981700.26666
    num_agent_steps_sampled: 723000
    num_agent_steps_trained: 723000
    num_steps_sampled: 723000
    num_steps_trained: 723000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,723,11576.1,723000,-5.6921,-2.24,-9.77,569.21




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 724000
  custom_metrics: {}
  date: 2021-10-23_23-13-39
  done: false
  episode_len_mean: 554.61
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.546099999999925
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 3
  episodes_total: 921
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.7042253023579207e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.214747530221939
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.01210606988105509
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 22215127825.066666
    num_agent_steps_sampled: 724000
    num_agent_steps_trained: 724000
    num_steps_sampled: 724000
    num_steps_trained: 724000
  iterations_since_restore: 724
  node_ip: 172.17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,724,11614.1,724000,-5.5461,-2.24,-9.77,554.61




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 725000
  custom_metrics: {}
  date: 2021-10-23_23-14-16
  done: false
  episode_len_mean: 541.48
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.4147999999999294
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 3
  episodes_total: 924
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.056337953536883e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.4610318183898925
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03367710030741162
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 22224817732.266666
    num_agent_steps_sampled: 725000
    num_agent_steps_trained: 725000
    num_steps_sampled: 725000
    num_steps_trained: 725000
  iterations_since_restore: 725
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,725,11651.9,725000,-5.4148,-2.24,-9.77,541.48




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 726000
  custom_metrics: {}
  date: 2021-10-23_23-14-55
  done: false
  episode_len_mean: 537.07
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.37069999999993
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 3
  episodes_total: 927
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.084506930305324e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.45735694832272
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.019929837890797192
          total_loss: .inf
          vf_explained_var: -0.5868237614631653
          vf_loss: 23708087199.288887
    num_agent_steps_sampled: 726000
    num_agent_steps_trained: 726000
    num_steps_sampled: 726000
    num_steps_trained: 726000
  iterations_since_restore: 726
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,726,11690.6,726000,-5.3707,-2.24,-9.77,537.07




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 727000
  custom_metrics: {}
  date: 2021-10-23_23-15-42
  done: false
  episode_len_mean: 528.81
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.288099999999932
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 3
  episodes_total: 930
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.126760395457983e-18
          cur_lr: 5.000000000000001e-05
          entropy: 1.4520600285795
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.12650103701485527
          total_loss: .inf
          vf_explained_var: -0.058957360684871674
          vf_loss: 29814900235.377777
    num_agent_steps_sampled: 727000
    num_agent_steps_trained: 727000
    num_steps_sampled: 727000
    num_steps_trained: 727000
  iterations_since_restore: 727
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,727,11737.1,727000,-5.2881,-2.24,-9.77,528.81




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 728000
  custom_metrics: {}
  date: 2021-10-23_23-16-14
  done: false
  episode_len_mean: 529.82
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.298199999999931
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 2
  episodes_total: 932
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3690140593186973e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.6588226543532478
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.021577782556414606
          total_loss: .inf
          vf_explained_var: -0.9479402303695679
          vf_loss: 58239640644.26667
    num_agent_steps_sampled: 728000
    num_agent_steps_trained: 728000
    num_steps_sampled: 728000
    num_steps_trained: 728000
  iterations_since_restore: 728
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,728,11769.4,728000,-5.2982,-2.24,-9.77,529.82




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 729000
  custom_metrics: {}
  date: 2021-10-23_23-16-45
  done: false
  episode_len_mean: 522.8
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.227999999999932
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 3
  episodes_total: 935
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0535210889780463e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.2228986514939202
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.12028069868683815
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 13794446244.977777
    num_agent_steps_sampled: 729000
    num_agent_steps_trained: 729000
    num_steps_sampled: 729000
    num_steps_trained: 729000
  iterations_since_restore: 729
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,729,11799.8,729000,-5.228,-2.24,-9.77,522.8




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 730000
  custom_metrics: {}
  date: 2021-10-23_23-17-14
  done: false
  episode_len_mean: 518.51
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.185099999999934
  episode_reward_min: -9.769999999999836
  episodes_this_iter: 2
  episodes_total: 937
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.08028163346707e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.4659450623724195
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.16008868782470623
          total_loss: .inf
          vf_explained_var: -0.8494405150413513
          vf_loss: 25528298001.066666
    num_agent_steps_sampled: 730000
    num_agent_steps_trained: 730000
    num_steps_sampled: 730000
    num_steps_trained: 730000
  iterations_since_restore: 730
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,730,11829.7,730000,-5.1851,-2.24,-9.77,518.51




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 731000
  custom_metrics: {}
  date: 2021-10-23_23-17-43
  done: false
  episode_len_mean: 510.23
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.102299999999935
  episode_reward_min: -9.649999999999839
  episodes_this_iter: 2
  episodes_total: 939
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.620422450200605e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.1480449848704868
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.18775237848361334
          total_loss: .inf
          vf_explained_var: -0.81831294298172
          vf_loss: 39133430613.333336
    num_agent_steps_sampled: 731000
    num_agent_steps_trained: 731000
    num_steps_sampled: 731000
    num_steps_trained: 731000
  iterations_since_restore: 731
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,731,11858.3,731000,-5.1023,-2.24,-9.65,510.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 732000
  custom_metrics: {}
  date: 2021-10-23_23-18-04
  done: false
  episode_len_mean: 504.75
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -5.047499999999936
  episode_reward_min: -9.649999999999839
  episodes_this_iter: 2
  episodes_total: 941
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.930633675300908e-17
          cur_lr: 5.000000000000001e-05
          entropy: 1.2498456491364374
          entropy_coeff: 0.009999999999999998
          kl: 2.264694005250931
          policy_loss: -0.02611172335843245
          total_loss: 537185459313.7778
          vf_explained_var: 0.2237139642238617
          vf_loss: 537185459313.7778
    num_agent_steps_sampled: 732000
    num_agent_steps_trained: 732000
    num_steps_sampled: 732000
    num_steps_trained: 732000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,732,11878.9,732000,-5.0475,-2.24,-9.65,504.75




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 733000
  custom_metrics: {}
  date: 2021-10-23_23-18-48
  done: false
  episode_len_mean: 489.2
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.891999999999939
  episode_reward_min: -9.649999999999839
  episodes_this_iter: 3
  episodes_total: 944
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0395950512951359e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.0201342933707767
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.09930952986081441
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 12885025553.066668
    num_agent_steps_sampled: 733000
    num_agent_steps_trained: 733000
    num_steps_sampled: 733000
    num_steps_trained: 733000
  iterations_since_restore: 733
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,733,11923.5,733000,-4.892,-2.24,-9.65,489.2




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 734000
  custom_metrics: {}
  date: 2021-10-23_23-19-25
  done: false
  episode_len_mean: 477.69
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.776899999999942
  episode_reward_min: -9.649999999999839
  episodes_this_iter: 3
  episodes_total: 947
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5593925769427044e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.5852321585019429
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04305100076728397
          total_loss: .inf
          vf_explained_var: -0.604274570941925
          vf_loss: 55719570255.64445
    num_agent_steps_sampled: 734000
    num_agent_steps_trained: 734000
    num_steps_sampled: 734000
    num_steps_trained: 734000
  iterations_since_restore: 734
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,734,11960.5,734000,-4.7769,-2.24,-9.65,477.69




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 735000
  custom_metrics: {}
  date: 2021-10-23_23-19-58
  done: false
  episode_len_mean: 459.79
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.597899999999946
  episode_reward_min: -9.649999999999839
  episodes_this_iter: 3
  episodes_total: 950
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.339088865414055e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.3486541032791137
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.017370726664861044
          total_loss: .inf
          vf_explained_var: -0.1314651370048523
          vf_loss: 14383688800.711111
    num_agent_steps_sampled: 735000
    num_agent_steps_trained: 735000
    num_steps_sampled: 735000
    num_steps_trained: 735000
  iterations_since_restore: 735
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,735,11992.8,735000,-4.5979,-2.24,-9.65,459.79


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 736000
  custom_metrics: {}
  date: 2021-10-23_23-20-23
  done: false
  episode_len_mean: 453.51
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.535099999999947
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 952
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5086332981210836e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1533758733007642
          entropy_coeff: 0.009999999999999998
          kl: 2.2435973910821807
          policy_loss: -0.1401570737361908
          total_loss: 37359956809.95556
          vf_explained_var: -0.6739296913146973
          vf_loss: 37359956809.95556
    num_agent_steps_sampled: 736000
    num_agent_steps_trained: 736000
    num_steps_sampled: 736000
    num_steps_trained: 736000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,736,12018.2,736000,-4.5351,-2.24,-9.92,453.51




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 737000
  custom_metrics: {}
  date: 2021-10-23_23-20-53
  done: false
  episode_len_mean: 447.62
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.476199999999948
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 954
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.262949947181627e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.9578823354509142
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.005667555663320754
          total_loss: .inf
          vf_explained_var: -0.4777071177959442
          vf_loss: 137047883019.37778
    num_agent_steps_sampled: 737000
    num_agent_steps_trained: 737000
    num_steps_sampled: 737000
    num_steps_trained: 737000
  iterations_since_restore: 737
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,737,12048,737000,-4.4762,-2.24,-9.92,447.62




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 738000
  custom_metrics: {}
  date: 2021-10-23_23-21-38
  done: false
  episode_len_mean: 429.69
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.296899999999953
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 4
  episodes_total: 958
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.894424920772442e-16
          cur_lr: 5.000000000000001e-05
          entropy: 1.1652978281180064
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.020486880818174943
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 14691313487.644444
    num_agent_steps_sampled: 738000
    num_agent_steps_trained: 738000
    num_steps_sampled: 738000
    num_steps_trained: 738000
  iterations_since_restore: 738
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,738,12093.1,738000,-4.2969,-2.24,-9.92,429.69




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 739000
  custom_metrics: {}
  date: 2021-10-23_23-22-36
  done: false
  episode_len_mean: 415.92
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.159199999999955
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 961
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.184163738115866e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.3841738806830512
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.01337358984682295
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 12800407631.644444
    num_agent_steps_sampled: 739000
    num_agent_steps_trained: 739000
    num_steps_sampled: 739000
    num_steps_trained: 739000
  iterations_since_restore: 739
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,739,12150.8,739000,-4.1592,-2.24,-9.92,415.92


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 740000
  custom_metrics: {}
  date: 2021-10-23_23-22-58
  done: false
  episode_len_mean: 410.06
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -4.1005999999999565
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 963
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7762456071737993e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.2935793823666042
          entropy_coeff: 0.009999999999999998
          kl: 1.390391457411978
          policy_loss: -0.1033367524544398
          total_loss: 312158031780.9778
          vf_explained_var: 0.03690909966826439
          vf_loss: 312158031780.9778
    num_agent_steps_sampled: 740000
    num_agent_steps_trained: 740000
    num_steps_sampled: 740000
    num_steps_trained: 740000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,740,12172.5,740000,-4.1006,-2.24,-9.92,410.06




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 741000
  custom_metrics: {}
  date: 2021-10-23_23-23-40
  done: false
  episode_len_mean: 397.13
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.971299999999959
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 966
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6643684107606976e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.3154376970397101
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.012108968736396896
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 11341220767.288889
    num_agent_steps_sampled: 741000
    num_agent_steps_trained: 741000
    num_steps_sampled: 741000
    num_steps_trained: 741000
  iterations_since_restore: 741
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,741,12215.3,741000,-3.9713,-2.24,-9.92,397.13




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 742000
  custom_metrics: {}
  date: 2021-10-23_23-24-16
  done: false
  episode_len_mean: 392.78
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.9277999999999604
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 969
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.996552616141047e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.3114089137978024
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07576210637732099
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 9211378744.88889
    num_agent_steps_sampled: 742000
    num_agent_steps_trained: 742000
    num_steps_sampled: 742000
    num_steps_trained: 742000
  iterations_since_restore: 742
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,742,12251.3,742000,-3.9278,-2.24,-9.92,392.78




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 743000
  custom_metrics: {}
  date: 2021-10-23_23-24-47
  done: false
  episode_len_mean: 394.89
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.94889999999996
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 972
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.994828924211573e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.4340255498886108
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.041788109888633095
          total_loss: .inf
          vf_explained_var: -0.555104672908783
          vf_loss: 183187208519.1111
    num_agent_steps_sampled: 743000
    num_agent_steps_trained: 743000
    num_steps_sampled: 743000
    num_steps_trained: 743000
  iterations_since_restore: 743
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,743,12282.3,743000,-3.9489,-2.24,-9.92,394.89




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 744000
  custom_metrics: {}
  date: 2021-10-23_23-25-21
  done: false
  episode_len_mean: 392.41
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.924099999999961
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 975
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.992243386317354e-15
          cur_lr: 5.000000000000001e-05
          entropy: 1.3463167799843683
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.05126592719720469
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 6783084771.555555
    num_agent_steps_sampled: 744000
    num_agent_steps_trained: 744000
    num_steps_sampled: 744000
    num_steps_trained: 744000
  iterations_since_restore: 744
  node_ip: 172.17.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,744,12316.2,744000,-3.9241,-2.24,-9.92,392.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 745000
  custom_metrics: {}
  date: 2021-10-23_23-25-45
  done: false
  episode_len_mean: 392.54
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.925399999999961
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 977
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3488365079476034e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.5631095237202115
          entropy_coeff: 0.009999999999999998
          kl: 2.020967059665256
          policy_loss: -0.10756610458095868
          total_loss: 426498458601.24445
          vf_explained_var: 0.47373467683792114
          vf_loss: 426498458601.24445
    num_agent_steps_sampled: 745000
    num_agent_steps_trained: 745000
    num_steps_sampled: 745000
    num_steps_trained: 745000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,745,12339.7,745000,-3.9254,-2.24,-9.92,392.54




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 746000
  custom_metrics: {}
  date: 2021-10-23_23-26-21
  done: false
  episode_len_mean: 391.33
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.9132999999999605
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 980
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.023254761921406e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2992015182971954
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.16693908977839683
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 11287130318.222221
    num_agent_steps_sampled: 746000
    num_agent_steps_trained: 746000
    num_steps_sampled: 746000
    num_steps_trained: 746000
  iterations_since_restore: 746
  node_ip: 172.17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,746,12376,746000,-3.9133,-2.24,-9.92,391.33




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 747000
  custom_metrics: {}
  date: 2021-10-23_23-26-50
  done: false
  episode_len_mean: 390.75
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.9074999999999607
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 982
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.034882142882108e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.498422336578369
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.08361738464898533
          total_loss: .inf
          vf_explained_var: -0.2505077123641968
          vf_loss: 113862413971.91112
    num_agent_steps_sampled: 747000
    num_agent_steps_trained: 747000
    num_steps_sampled: 747000
    num_steps_trained: 747000
  iterations_since_restore: 747
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,747,12404.6,747000,-3.9075,-2.24,-9.92,390.75




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 748000
  custom_metrics: {}
  date: 2021-10-23_23-27-11
  done: false
  episode_len_mean: 385.91
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.8590999999999616
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 1
  episodes_total: 983
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.552323214323162e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.4134084741274515
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.017679238071044285
          total_loss: .inf
          vf_explained_var: 0.29421645402908325
          vf_loss: 153240542139.73334
    num_agent_steps_sampled: 748000
    num_agent_steps_trained: 748000
    num_steps_sampled: 748000
    num_steps_trained: 748000
  iterations_since_restore: 748
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,748,12425.6,748000,-3.8591,-2.24,-9.92,385.91




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 749000
  custom_metrics: {}
  date: 2021-10-23_23-27-40
  done: false
  episode_len_mean: 386.57
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.8656999999999617
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 986
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.828484821484745e-14
          cur_lr: 5.000000000000001e-05
          entropy: 1.2276170591513316
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.08040701155550778
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 21599147992.177776
    num_agent_steps_sampled: 749000
    num_agent_steps_trained: 749000
    num_steps_sampled: 749000
    num_steps_trained: 749000
  iterations_since_restore: 749
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,749,12455,749000,-3.8657,-2.24,-9.92,386.57




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 750000
  custom_metrics: {}
  date: 2021-10-23_23-28-01
  done: false
  episode_len_mean: 388.05
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.880499999999962
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 1
  episodes_total: 987
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0242727232227117e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.3599526683489482
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.029334953758451673
          total_loss: .inf
          vf_explained_var: -0.2881360650062561
          vf_loss: 64488690039.46667
    num_agent_steps_sampled: 750000
    num_agent_steps_trained: 750000
    num_steps_sampled: 750000
    num_steps_trained: 750000
  iterations_since_restore: 750
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,750,12476.1,750000,-3.8805,-2.24,-9.92,388.05




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 751000
  custom_metrics: {}
  date: 2021-10-23_23-28-51
  done: false
  episode_len_mean: 394.41
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.9440999999999597
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 990
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5364090848340672e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.3338353051079643
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03991660152872403
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 21517522594.133335
    num_agent_steps_sampled: 751000
    num_agent_steps_trained: 751000
    num_steps_sampled: 751000
    num_steps_trained: 751000
  iterations_since_restore: 751
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,751,12525.4,751000,-3.9441,-2.24,-9.92,394.41




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 752000
  custom_metrics: {}
  date: 2021-10-23_23-29-20
  done: false
  episode_len_mean: 396.26
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.9625999999999597
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 993
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3046136272511004e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.5672098451190524
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04068615978790654
          total_loss: .inf
          vf_explained_var: -0.29503872990608215
          vf_loss: 41099480302.933334
    num_agent_steps_sampled: 752000
    num_agent_steps_trained: 752000
    num_steps_sampled: 752000
    num_steps_trained: 752000
  iterations_since_restore: 752

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,752,12555,752000,-3.9626,-2.24,-9.92,396.26




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 753000
  custom_metrics: {}
  date: 2021-10-23_23-29-55
  done: false
  episode_len_mean: 390.56
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.90559999999996
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 995
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4569204408766517e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.362116731537713
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03651686223844687
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 7947824412.444445
    num_agent_steps_sampled: 753000
    num_agent_steps_trained: 753000
    num_steps_sampled: 753000
    num_steps_trained: 753000
  iterations_since_restore: 753
  node_ip: 172.17.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,753,12590,753000,-3.9056,-2.24,-9.92,390.56




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 754000
  custom_metrics: {}
  date: 2021-10-23_23-30-19
  done: false
  episode_len_mean: 392.02
  episode_media: {}
  episode_reward_max: -2.239999999999996
  episode_reward_mean: -3.9201999999999604
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 997
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.185380661314979e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.4421187573009067
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.5768602174189356
          total_loss: .inf
          vf_explained_var: -0.25881174206733704
          vf_loss: 41908491918.22222
    num_agent_steps_sampled: 754000
    num_agent_steps_trained: 754000
    num_steps_sampled: 754000
    num_steps_trained: 754000
  iterations_since_restore: 754
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,754,12613.4,754000,-3.9202,-2.24,-9.92,392.02




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 755000
  custom_metrics: {}
  date: 2021-10-23_23-30-52
  done: false
  episode_len_mean: 397.85
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.9784999999999595
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 1000
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.778070991972467e-13
          cur_lr: 5.000000000000001e-05
          entropy: 1.213802010483212
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.09554267244206535
          total_loss: .inf
          vf_explained_var: -0.9996201992034912
          vf_loss: 32757574960.355556
    num_agent_steps_sampled: 755000
    num_agent_steps_trained: 755000
    num_steps_sampled: 755000
    num_steps_trained: 755000
  iterations_since_restore: 755


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,755,12646.3,755000,-3.9785,-2.37,-9.92,397.85




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 756000
  custom_metrics: {}
  date: 2021-10-23_23-31-14
  done: false
  episode_len_mean: 398.06
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.9805999999999586
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 1002
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1667106487958701e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.4610855963495042
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.10412911044226753
          total_loss: .inf
          vf_explained_var: -0.07662809640169144
          vf_loss: 696456674508.8
    num_agent_steps_sampled: 756000
    num_agent_steps_trained: 756000
    num_steps_sampled: 756000
    num_steps_trained: 756000
  iterations_since_restore: 756
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,756,12668.6,756000,-3.9806,-2.37,-9.92,398.06




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 757000
  custom_metrics: {}
  date: 2021-10-23_23-31-49
  done: false
  episode_len_mean: 396.15
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.9614999999999596
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 1004
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7500659731938054e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.3993406494458516
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.013252991996705533
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 16747399600.355556
    num_agent_steps_sampled: 757000
    num_agent_steps_trained: 757000
    num_steps_sampled: 757000
    num_steps_trained: 757000
  iterations_since_restore: 757
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,757,12703.6,757000,-3.9615,-2.37,-9.92,396.15




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 758000
  custom_metrics: {}
  date: 2021-10-23_23-32-17
  done: false
  episode_len_mean: 400.18
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -4.001799999999959
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 1007
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.625098959790708e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.2728194733460745
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.09188564552201164
          total_loss: .inf
          vf_explained_var: -0.27220073342323303
          vf_loss: 17902530412.08889
    num_agent_steps_sampled: 758000
    num_agent_steps_trained: 758000
    num_steps_sampled: 758000
    num_steps_trained: 758000
  iterations_since_restore: 758


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,758,12730.9,758000,-4.0018,-2.37,-9.92,400.18




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 759000
  custom_metrics: {}
  date: 2021-10-23_23-32-50
  done: false
  episode_len_mean: 401.48
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -4.014799999999958
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 1009
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.937648439686061e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.5763415694236755
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.021588357496592735
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 16042972233.955555
    num_agent_steps_sampled: 759000
    num_agent_steps_trained: 759000
    num_steps_sampled: 759000
    num_steps_trained: 759000
  iterations_since_restore: 759
  node_ip: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,759,12763.9,759000,-4.0148,-2.37,-9.92,401.48




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 760000
  custom_metrics: {}
  date: 2021-10-23_23-33-13
  done: false
  episode_len_mean: 393.93
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.939299999999959
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 1011
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.906472659529094e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.4310880144437155
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0020879319144619835
          total_loss: .inf
          vf_explained_var: -0.5902113914489746
          vf_loss: 34006839159.466667
    num_agent_steps_sampled: 760000
    num_agent_steps_trained: 760000
    num_steps_sampled: 760000
    num_steps_trained: 760000
  iterations_since_restore: 76

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,760,12787.4,760000,-3.9393,-2.37,-9.92,393.93




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 761000
  custom_metrics: {}
  date: 2021-10-23_23-33-48
  done: false
  episode_len_mean: 397.94
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.979399999999959
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 1014
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.859708989293636e-12
          cur_lr: 5.000000000000001e-05
          entropy: 1.6848715278837416
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03454368503557311
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 17398694240.711113
    num_agent_steps_sampled: 761000
    num_agent_steps_trained: 761000
    num_steps_sampled: 761000
    num_steps_trained: 761000
  iterations_since_restore: 761
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,761,12822.3,761000,-3.9794,-2.37,-9.92,397.94




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 762000
  custom_metrics: {}
  date: 2021-10-23_23-34-21
  done: false
  episode_len_mean: 398.59
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.9858999999999596
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 1016
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3289563483940457e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.7456743372811212
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.006854384806421068
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 6500041551.644444
    num_agent_steps_sampled: 762000
    num_agent_steps_trained: 762000
    num_steps_sampled: 762000
    num_steps_trained: 762000
  iterations_since_restore: 762
  node_ip: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,762,12855.5,762000,-3.9859,-2.37,-9.92,398.59




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 763000
  custom_metrics: {}
  date: 2021-10-23_23-34-45
  done: false
  episode_len_mean: 398.93
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.9892999999999588
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 1018
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.993434522591069e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.3397712767124177
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.12734551429748536
          total_loss: .inf
          vf_explained_var: -0.067045658826828
          vf_loss: 397493258558.57776
    num_agent_steps_sampled: 763000
    num_agent_steps_trained: 763000
    num_steps_sampled: 763000
    num_steps_trained: 763000
  iterations_since_restore: 763


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,763,12879,763000,-3.9893,-2.37,-9.92,398.93




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 764000
  custom_metrics: {}
  date: 2021-10-23_23-35-39
  done: false
  episode_len_mean: 398.45
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.984499999999959
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 4
  episodes_total: 1022
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.990151783886603e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.488732682334052
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.14042121296127638
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 6013042628.266666
    num_agent_steps_sampled: 764000
    num_agent_steps_trained: 764000
    num_steps_sampled: 764000
    num_steps_trained: 764000
  iterations_since_restore: 764
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,764,12932.8,764000,-3.9845,-2.37,-9.92,398.45




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 765000
  custom_metrics: {}
  date: 2021-10-23_23-36-13
  done: false
  episode_len_mean: 399.13
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.991299999999959
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 1024
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.4852276758299054e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.8874131679534911
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.013520308697803154
          total_loss: .inf
          vf_explained_var: -0.6599072813987732
          vf_loss: 38620574970.31111
    num_agent_steps_sampled: 765000
    num_agent_steps_trained: 765000
    num_steps_sampled: 765000
    num_steps_trained: 765000
  iterations_since_restore: 765

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,765,12967.6,765000,-3.9913,-2.37,-9.92,399.13




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 766000
  custom_metrics: {}
  date: 2021-10-23_23-36-43
  done: false
  episode_len_mean: 401.32
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -4.013199999999958
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 1027
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.727841513744854e-11
          cur_lr: 5.000000000000001e-05
          entropy: 1.385336947441101
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03977904427382681
          total_loss: .inf
          vf_explained_var: -0.14040334522724152
          vf_loss: 15116196821.333334
    num_agent_steps_sampled: 766000
    num_agent_steps_trained: 766000
    num_steps_sampled: 766000
    num_steps_trained: 766000
  iterations_since_restore: 766


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,766,12997.3,766000,-4.0132,-2.37,-9.92,401.32




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 767000
  custom_metrics: {}
  date: 2021-10-23_23-37-23
  done: false
  episode_len_mean: 399.31
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.9930999999999592
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 1030
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0091762270617284e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0560619824462467
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.021296905374361408
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 8601566566.4
    num_agent_steps_sampled: 767000
    num_agent_steps_trained: 767000
    num_steps_sampled: 767000
    num_steps_trained: 767000
  iterations_since_restore: 767
  node_ip: 172.17.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,767,13036.9,767000,-3.9931,-2.37,-9.92,399.31




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 768000
  custom_metrics: {}
  date: 2021-10-23_23-38-00
  done: false
  episode_len_mean: 396.85
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.9684999999999593
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 3
  episodes_total: 1033
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.513764340592593e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.9233325017823113
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03582080871694618
          total_loss: .inf
          vf_explained_var: -0.4086254835128784
          vf_loss: 21516761395.2
    num_agent_steps_sampled: 768000
    num_agent_steps_trained: 768000
    num_steps_sampled: 768000
    num_steps_trained: 768000
  iterations_since_restore: 768
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,768,13073.6,768000,-3.9685,-2.37,-9.92,396.85


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 769000
  custom_metrics: {}
  date: 2021-10-23_23-38-23
  done: false
  episode_len_mean: 400.06
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -4.0005999999999595
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 1035
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.270646510888889e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2204446408483718
          entropy_coeff: 0.009999999999999998
          kl: 1.893497316042582
          policy_loss: -0.03525027073919773
          total_loss: 147497091709.15555
          vf_explained_var: 0.20346683263778687
          vf_loss: 147497091709.15555
    num_agent_steps_sampled: 769000
    num_agent_steps_trained: 769000
    num_steps_sampled: 769000
    num_steps_trained: 769000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,769,13096.6,769000,-4.0006,-2.37,-9.92,400.06




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 770000
  custom_metrics: {}
  date: 2021-10-23_23-39-09
  done: false
  episode_len_mean: 391.81
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.918099999999961
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 4
  episodes_total: 1039
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.4059697663333337e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.3585263629754385
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04418626179297765
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 4558910803.2
    num_agent_steps_sampled: 770000
    num_agent_steps_trained: 770000
    num_steps_sampled: 770000
    num_steps_trained: 770000
  iterations_since_restore: 770
  node_ip: 172.17.0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,770,13143.1,770000,-3.9181,-2.37,-9.92,391.81




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 771000
  custom_metrics: {}
  date: 2021-10-23_23-39-58
  done: false
  episode_len_mean: 384.81
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.8480999999999623
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 4
  episodes_total: 1043
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.108954649500001e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.2350034634272258
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.15726876904567083
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 3366784516.266667
    num_agent_steps_sampled: 771000
    num_agent_steps_trained: 771000
    num_steps_sampled: 771000
    num_steps_trained: 771000
  iterations_since_restore: 771
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,771,13191.8,771000,-3.8481,-2.37,-9.92,384.81


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 772000
  custom_metrics: {}
  date: 2021-10-23_23-40-17
  done: false
  episode_len_mean: 391.13
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.9112999999999607
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 1
  episodes_total: 1044
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.663431974250002e-10
          cur_lr: 5.000000000000001e-05
          entropy: 1.0095360537370046
          entropy_coeff: 0.009999999999999998
          kl: 3.6282978958553738
          policy_loss: -0.06781012904312876
          total_loss: 1822025678119.8223
          vf_explained_var: -0.3273313045501709
          vf_loss: 1822025678119.8223
    num_agent_steps_sampled: 772000
    num_agent_steps_trained: 772000
    num_steps_sampled: 772000
    num_steps_trained: 772000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,772,13210.8,772000,-3.9113,-2.37,-9.92,391.13




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 773000
  custom_metrics: {}
  date: 2021-10-23_23-41-02
  done: false
  episode_len_mean: 388.11
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.881099999999961
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 4
  episodes_total: 1048
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1495147961374995e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.3765994787216187
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.032027388943566215
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 10429532070.4
    num_agent_steps_sampled: 773000
    num_agent_steps_trained: 773000
    num_steps_sampled: 773000
    num_steps_trained: 773000
  iterations_since_restore: 773
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,773,13255.9,773000,-3.8811,-2.37,-9.92,388.11




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 774000
  custom_metrics: {}
  date: 2021-10-23_23-41-49
  done: false
  episode_len_mean: 388.8
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.887999999999961
  episode_reward_min: -9.919999999999833
  episodes_this_iter: 2
  episodes_total: 1050
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.72427219420625e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.15839474995931
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.007939931253592174
          total_loss: .inf
          vf_explained_var: -0.026958772912621498
          vf_loss: 409726061886.57776
    num_agent_steps_sampled: 774000
    num_agent_steps_trained: 774000
    num_steps_sampled: 774000
    num_steps_trained: 774000
  iterations_since_restore: 774
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,774,13302.8,774000,-3.888,-2.37,-9.92,388.8




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 775000
  custom_metrics: {}
  date: 2021-10-23_23-42-34
  done: false
  episode_len_mean: 379.99
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.7998999999999614
  episode_reward_min: -8.989999999999853
  episodes_this_iter: 4
  episodes_total: 1054
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5864082913093752e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.6563172115219964
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06805898861752616
          total_loss: .inf
          vf_explained_var: -0.2697437107563019
          vf_loss: 54357357107.2
    num_agent_steps_sampled: 775000
    num_agent_steps_trained: 775000
    num_steps_sampled: 775000
    num_steps_trained: 775000
  iterations_since_restore: 775
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,775,13347.3,775000,-3.7999,-2.37,-8.99,379.99




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 776000
  custom_metrics: {}
  date: 2021-10-23_23-43-15
  done: false
  episode_len_mean: 380.11
  episode_media: {}
  episode_reward_max: -2.3699999999999934
  episode_reward_mean: -3.801099999999963
  episode_reward_min: -8.989999999999853
  episodes_this_iter: 4
  episodes_total: 1058
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.879612436964062e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.253655805852678
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.002919030975964334
          total_loss: .inf
          vf_explained_var: -0.8830231428146362
          vf_loss: 14985377382.4
    num_agent_steps_sampled: 776000
    num_agent_steps_trained: 776000
    num_steps_sampled: 776000
    num_steps_trained: 776000
  iterations_since_restore: 776
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,776,13388.7,776000,-3.8011,-2.37,-8.99,380.11




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 777000
  custom_metrics: {}
  date: 2021-10-23_23-43-43
  done: false
  episode_len_mean: 384.09
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.8408999999999627
  episode_reward_min: -8.989999999999853
  episodes_this_iter: 2
  episodes_total: 1060
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.819418655446093e-09
          cur_lr: 5.000000000000001e-05
          entropy: 1.195700740151935
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.009894373516241709
          total_loss: .inf
          vf_explained_var: -0.1439095437526703
          vf_loss: 178875098572.8
    num_agent_steps_sampled: 777000
    num_agent_steps_trained: 777000
    num_steps_sampled: 777000
    num_steps_trained: 777000
  iterations_since_restore: 777
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,777,13416.3,777000,-3.8409,-2.41,-8.99,384.09




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 778000
  custom_metrics: {}
  date: 2021-10-23_23-44-13
  done: false
  episode_len_mean: 384.51
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.8450999999999618
  episode_reward_min: -8.989999999999853
  episodes_this_iter: 2
  episodes_total: 1062
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.729127983169139e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.8933149755001069
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.09073861456579632
          total_loss: .inf
          vf_explained_var: -0.1711839735507965
          vf_loss: 119296601543.11111
    num_agent_steps_sampled: 778000
    num_agent_steps_trained: 778000
    num_steps_sampled: 778000
    num_steps_trained: 778000
  iterations_since_restore: 778

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,778,13446.3,778000,-3.8451,-2.41,-8.99,384.51




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 779000
  custom_metrics: {}
  date: 2021-10-23_23-44-46
  done: false
  episode_len_mean: 383.94
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.8393999999999617
  episode_reward_min: -8.989999999999853
  episodes_this_iter: 3
  episodes_total: 1065
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3093691974753707e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1153372724850972
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.020146899639318386
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 24674970578.488888
    num_agent_steps_sampled: 779000
    num_agent_steps_trained: 779000
    num_steps_sampled: 779000
    num_steps_trained: 779000
  iterations_since_restore: 779
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,779,13479.9,779000,-3.8394,-2.41,-8.99,383.94




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 780000
  custom_metrics: {}
  date: 2021-10-23_23-45-21
  done: false
  episode_len_mean: 385.33
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.8532999999999618
  episode_reward_min: -8.989999999999853
  episodes_this_iter: 3
  episodes_total: 1068
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9640537962130562e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.9536532567607032
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.015153858448482223
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 4069292839.822222
    num_agent_steps_sampled: 780000
    num_agent_steps_trained: 780000
    num_steps_sampled: 780000
    num_steps_trained: 780000
  iterations_since_restore: 780
  node_ip: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,780,13514.4,780000,-3.8533,-2.41,-8.99,385.33




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 781000
  custom_metrics: {}
  date: 2021-10-23_23-45-49
  done: false
  episode_len_mean: 386.33
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.863299999999961
  episode_reward_min: -8.989999999999853
  episodes_this_iter: 2
  episodes_total: 1070
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9460806943195853e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.1077178690168592
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.03252887771361404
          total_loss: .inf
          vf_explained_var: -0.4879007041454315
          vf_loss: 11171495446.755556
    num_agent_steps_sampled: 781000
    num_agent_steps_trained: 781000
    num_steps_sampled: 781000
    num_steps_trained: 781000
  iterations_since_restore: 781


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,781,13542.6,781000,-3.8633,-2.41,-8.99,386.33


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 782000
  custom_metrics: {}
  date: 2021-10-23_23-46-03
  done: false
  episode_len_mean: 392.71
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.92709999999996
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 1
  episodes_total: 1071
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.419121041479378e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.323425285021464
          entropy_coeff: 0.009999999999999998
          kl: 4.195343142085605
          policy_loss: -0.062157201849752
          total_loss: 75575656038.4
          vf_explained_var: -0.750244677066803
          vf_loss: 75575656038.4
    num_agent_steps_sampled: 782000
    num_agent_steps_trained: 782000
    num_steps_sampled: 782000
    num_steps_trained: 782000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,782,13556.9,782000,-3.9271,-2.41,-9.48,392.71




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 783000
  custom_metrics: {}
  date: 2021-10-23_23-46-40
  done: false
  episode_len_mean: 393.53
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.9352999999999594
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1074
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.628681562219065e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.6718994630707635
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02620156564646297
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 16237101046.755556
    num_agent_steps_sampled: 783000
    num_agent_steps_trained: 783000
    num_steps_sampled: 783000
    num_steps_trained: 783000
  iterations_since_restore: 783
  node_ip: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,783,13593.1,783000,-3.9353,-2.41,-9.48,393.53


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 784000
  custom_metrics: {}
  date: 2021-10-23_23-47-04
  done: false
  episode_len_mean: 396.47
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.9646999999999593
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 1
  episodes_total: 1075
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.943022343328603e-08
          cur_lr: 5.000000000000001e-05
          entropy: 1.2134013997183906
          entropy_coeff: 0.009999999999999998
          kl: 1.4848702188995149
          policy_loss: -0.1434585879246394
          total_loss: 453362534536.5333
          vf_explained_var: 0.38027510046958923
          vf_loss: 453362534536.5333
    num_agent_steps_sampled: 784000
    num_agent_steps_trained: 784000
    num_steps_sampled: 784000
    num_steps_trained: 784000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,784,13617.2,784000,-3.9647,-2.41,-9.48,396.47




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 785000
  custom_metrics: {}
  date: 2021-10-23_23-47-31
  done: false
  episode_len_mean: 395.2
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.9519999999999595
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1078
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4914533514992901e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7633133838574092
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1057695092426406
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 11171377368.177778
    num_agent_steps_sampled: 785000
    num_agent_steps_trained: 785000
    num_steps_sampled: 785000
    num_steps_trained: 785000
  iterations_since_restore: 785
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,785,13645,785000,-3.952,-2.41,-9.48,395.2


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 786000
  custom_metrics: {}
  date: 2021-10-23_23-47-48
  done: false
  episode_len_mean: 397.64
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.976399999999958
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 1
  episodes_total: 1079
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2371800272489344e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.1927998847431607
          entropy_coeff: 0.009999999999999998
          kl: 2.769059740172492
          policy_loss: -0.012923319223854276
          total_loss: 332370974310.4
          vf_explained_var: -0.09984269738197327
          vf_loss: 332370974310.4
    num_agent_steps_sampled: 786000
    num_agent_steps_trained: 786000
    num_steps_sampled: 786000
    num_steps_trained: 786000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,786,13661.6,786000,-3.9764,-2.41,-9.48,397.64




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 787000
  custom_metrics: {}
  date: 2021-10-23_23-48-39
  done: false
  episode_len_mean: 400.99
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -4.009899999999957
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1082
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3557700408734035e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.0367045462131501
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0850816763109631
          total_loss: .inf
          vf_explained_var: -0.5922507643699646
          vf_loss: 73966494173.86667
    num_agent_steps_sampled: 787000
    num_agent_steps_trained: 787000
    num_steps_sampled: 787000
    num_steps_trained: 787000
  iterations_since_restore: 787
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,787,13712.9,787000,-4.0099,-2.41,-9.48,400.99




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 788000
  custom_metrics: {}
  date: 2021-10-23_23-49-14
  done: false
  episode_len_mean: 396.5
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.964999999999959
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 2
  episodes_total: 1084
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.033655061310104e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.7987256268660228
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.011239105659640498
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 12150356556.8
    num_agent_steps_sampled: 788000
    num_agent_steps_trained: 788000
    num_steps_sampled: 788000
    num_steps_trained: 788000
  iterations_since_restore: 788
  node_ip: 172.17.0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,788,13747.8,788000,-3.965,-2.41,-9.48,396.5


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 789000
  custom_metrics: {}
  date: 2021-10-23_23-49-29
  done: false
  episode_len_mean: 396.84
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.9683999999999586
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 1
  episodes_total: 1085
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.550482591965154e-07
          cur_lr: 5.000000000000001e-05
          entropy: 1.2493508299191793
          entropy_coeff: 0.009999999999999998
          kl: 3.675552405251397
          policy_loss: -0.04077954151564174
          total_loss: 93119862738.48889
          vf_explained_var: -0.49247679114341736
          vf_loss: 93119862738.48889
    num_agent_steps_sampled: 789000
    num_agent_steps_trained: 789000
    num_steps_sampled: 789000
    num_steps_trained: 789000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,789,13762.1,789000,-3.9684,-2.41,-9.48,396.84




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 790000
  custom_metrics: {}
  date: 2021-10-23_23-50-01
  done: false
  episode_len_mean: 397.77
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.9776999999999587
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1088
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1325723887947735e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.8832793732484182
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.020335130269328754
          total_loss: .inf
          vf_explained_var: -0.43404582142829895
          vf_loss: 24366164070.4
    num_agent_steps_sampled: 790000
    num_agent_steps_trained: 790000
    num_steps_sampled: 790000
    num_steps_trained: 790000
  iterations_since_restore: 790
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,790,13794.7,790000,-3.9777,-2.41,-9.48,397.77




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 791000
  custom_metrics: {}
  date: 2021-10-23_23-50-31
  done: false
  episode_len_mean: 398.28
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -3.982799999999959
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 2
  episodes_total: 1090
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6988585831921597e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.3455333987871805
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.16830673648251426
          total_loss: .inf
          vf_explained_var: -0.500214159488678
          vf_loss: 18664534355.91111
    num_agent_steps_sampled: 791000
    num_agent_steps_trained: 791000
    num_steps_sampled: 791000
    num_steps_trained: 791000
  iterations_since_restore: 791
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,791,13824.3,791000,-3.9828,-2.41,-9.48,398.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 792000
  custom_metrics: {}
  date: 2021-10-23_23-50-48
  done: false
  episode_len_mean: 401.39
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -4.013899999999959
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 1
  episodes_total: 1091
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.54828787478824e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1785157442092895
          entropy_coeff: 0.009999999999999998
          kl: 2.5249761197302076
          policy_loss: 0.038066511352856956
          total_loss: 318647740734.57776
          vf_explained_var: -0.00850900448858738
          vf_loss: 318647740734.57776
    num_agent_steps_sampled: 792000
    num_agent_steps_trained: 792000
    num_steps_sampled: 792000
    num_steps_trained: 792000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,792,13840.9,792000,-4.0139,-2.41,-9.48,401.39




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 793000
  custom_metrics: {}
  date: 2021-10-23_23-51-12
  done: false
  episode_len_mean: 404.04
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -4.040399999999957
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 2
  episodes_total: 1093
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.822431812182359e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2803991794586183
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.02967875881327523
          total_loss: .inf
          vf_explained_var: -0.7721856236457825
          vf_loss: 40957280119.46667
    num_agent_steps_sampled: 793000
    num_agent_steps_trained: 793000
    num_steps_sampled: 793000
    num_steps_trained: 793000
  iterations_since_restore: 793
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,793,13865.7,793000,-4.0404,-2.41,-9.48,404.04


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 794000
  custom_metrics: {}
  date: 2021-10-23_23-51-33
  done: false
  episode_len_mean: 411.83
  episode_media: {}
  episode_reward_max: -2.4099999999999926
  episode_reward_mean: -4.118299999999956
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 2
  episodes_total: 1095
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.73364771827354e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.1114539980888367
          entropy_coeff: 0.009999999999999998
          kl: 2.0768641226821476
          policy_loss: -0.06719328595532312
          total_loss: 48469510052.977776
          vf_explained_var: -0.26950475573539734
          vf_loss: 48469510052.977776
    num_agent_steps_sampled: 794000
    num_agent_steps_trained: 794000
    num_steps_sampled: 794000
    num_steps_trained: 794000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,794,13885.9,794000,-4.1183,-2.41,-9.48,411.83




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 795000
  custom_metrics: {}
  date: 2021-10-23_23-52-09
  done: false
  episode_len_mean: 407.15
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.087699999999958
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1098
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.600471577410308e-06
          cur_lr: 5.000000000000001e-05
          entropy: 1.2701274189684126
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.015262164858480294
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 22609915244.08889
    num_agent_steps_sampled: 795000
    num_agent_steps_trained: 795000
    num_steps_sampled: 795000
    num_steps_trained: 795000
  iterations_since_restore: 795
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,795,13921.7,795000,-4.0877,-1.65,-9.48,407.15




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 796000
  custom_metrics: {}
  date: 2021-10-23_23-52-48
  done: false
  episode_len_mean: 406.54
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.081599999999957
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1101
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2900707366115462e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.1379384332233005
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.028524083085358143
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 27951295761.066666
    num_agent_steps_sampled: 796000
    num_agent_steps_trained: 796000
    num_steps_sampled: 796000
    num_steps_trained: 796000
  iterations_since_restore: 796
  node_ip: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,796,13960.8,796000,-4.0816,-1.65,-9.48,406.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 797000
  custom_metrics: {}
  date: 2021-10-23_23-53-06
  done: false
  episode_len_mean: 407.91
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.095299999999957
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 1
  episodes_total: 1102
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.93510610491732e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.1232144527965122
          entropy_coeff: 0.009999999999999998
          kl: 3.6992285198635524
          policy_loss: -0.11389723974797461
          total_loss: 45415422270.577774
          vf_explained_var: -1.0
          vf_loss: 45415422270.577774
    num_agent_steps_sampled: 797000
    num_agent_steps_trained: 797000
    num_steps_sampled: 797000
    num_steps_trained: 797000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,797,13978.9,797000,-4.0953,-1.65,-9.48,407.91




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 798000
  custom_metrics: {}
  date: 2021-10-23_23-53-40
  done: false
  episode_len_mean: 408.22
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.098399999999958
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1105
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9026591573759793e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.335727106862598
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02452166813115279
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 17860324007.822224
    num_agent_steps_sampled: 798000
    num_agent_steps_trained: 798000
    num_steps_sampled: 798000
    num_steps_trained: 798000
  iterations_since_restore: 798
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,798,14012.7,798000,-4.0984,-1.65,-9.48,408.22


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 799000
  custom_metrics: {}
  date: 2021-10-23_23-54-04
  done: false
  episode_len_mean: 409.2
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.108199999999957
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 2
  episodes_total: 1107
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3539887360639704e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.1792992154757183
          entropy_coeff: 0.009999999999999998
          kl: 0.38959100147767456
          policy_loss: 0.009192239120602607
          total_loss: 204627926857.95557
          vf_explained_var: 0.6163172125816345
          vf_loss: 204627926857.95557
    num_agent_steps_sampled: 799000
    num_agent_steps_trained: 799000
    num_steps_sampled: 799000
    num_steps_trained: 799000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,799,14037.4,799000,-4.1082,-1.65,-9.48,409.2




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 800000
  custom_metrics: {}
  date: 2021-10-23_23-54-57
  done: false
  episode_len_mean: 408.59
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.157999999999957
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1110
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.530983104095953e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.2369358128971524
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.002243018357290162
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 13676685363.2
    num_agent_steps_sampled: 800000
    num_agent_steps_trained: 800000
    num_steps_sampled: 800000
    num_steps_trained: 800000
  iterations_since_restore: 800
  node_ip: 172.17.0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,800,14089.9,800000,-4.158,-1.65,-9.48,408.59


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 801000
  custom_metrics: {}
  date: 2021-10-23_23-55-16
  done: false
  episode_len_mean: 411.87
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.190799999999957
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 1
  episodes_total: 1111
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.796474656143931e-05
          cur_lr: 5.000000000000001e-05
          entropy: 1.0877190126313103
          entropy_coeff: 0.009999999999999998
          kl: 2.1679738336139254
          policy_loss: -0.11321487170126703
          total_loss: 33217972952.177776
          vf_explained_var: -0.11433596163988113
          vf_loss: 33217972952.177776
    num_agent_steps_sampled: 801000
    num_agent_steps_trained: 801000
    num_steps_sampled: 801000
    num_steps_trained: 801000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,801,14109.1,801000,-4.1908,-1.65,-9.48,411.87




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 802000
  custom_metrics: {}
  date: 2021-10-23_23-55-50
  done: false
  episode_len_mean: 410.82
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.268199999999958
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1114
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001469471198421589
          cur_lr: 5.000000000000001e-05
          entropy: 1.3955842402246263
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.27319289098183314
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 13774382648.88889
    num_agent_steps_sampled: 802000
    num_agent_steps_trained: 802000
    num_steps_sampled: 802000
    num_steps_trained: 802000
  iterations_since_restore: 802
  node_ip: 172.17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,802,14142.8,802000,-4.2682,-1.65,-9.48,410.82




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 803000
  custom_metrics: {}
  date: 2021-10-23_23-56-22
  done: false
  episode_len_mean: 411.32
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.2731999999999575
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 2
  episodes_total: 1116
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00022042067976323844
          cur_lr: 5.000000000000001e-05
          entropy: 1.4191766712400649
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.023708134475681517
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 2922902005.3333335
    num_agent_steps_sampled: 803000
    num_agent_steps_trained: 803000
    num_steps_sampled: 803000
    num_steps_trained: 803000
  iterations_since_restore: 803
  node_ip: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,803,14175.4,803000,-4.2732,-1.65,-9.48,411.32




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 804000
  custom_metrics: {}
  date: 2021-10-23_23-56-52
  done: false
  episode_len_mean: 409.01
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.288499999999958
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 3
  episodes_total: 1119
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00033063101964485753
          cur_lr: 5.000000000000001e-05
          entropy: 1.7368894471062555
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0071193940937519075
          total_loss: .inf
          vf_explained_var: -0.48289602994918823
          vf_loss: 16565690030.222221
    num_agent_steps_sampled: 804000
    num_agent_steps_trained: 804000
    num_steps_sampled: 804000
    num_steps_trained: 804000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,804,14205.3,804000,-4.2885,-1.65,-9.48,409.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 805000
  custom_metrics: {}
  date: 2021-10-23_23-57-11
  done: false
  episode_len_mean: 414.26
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.340999999999957
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 1
  episodes_total: 1120
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004959465294672865
          cur_lr: 5.000000000000001e-05
          entropy: 0.8441053410371144
          entropy_coeff: 0.009999999999999998
          kl: 1.5860787206225926
          policy_loss: -0.060063716106944616
          total_loss: 217018202885.6889
          vf_explained_var: 0.09804271161556244
          vf_loss: 217018202885.6889
    num_agent_steps_sampled: 805000
    num_agent_steps_trained: 805000
    num_steps_sampled: 805000
    num_steps_trained: 805000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,805,14223.9,805000,-4.341,-1.65,-9.48,414.26




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 806000
  custom_metrics: {}
  date: 2021-10-23_23-57-39
  done: false
  episode_len_mean: 416.97
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.396199999999956
  episode_reward_min: -9.479999999999842
  episodes_this_iter: 2
  episodes_total: 1122
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007439197942009295
          cur_lr: 5.000000000000001e-05
          entropy: 1.288937791188558
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.016101419377244183
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 15585792802.133333
    num_agent_steps_sampled: 806000
    num_agent_steps_trained: 806000
    num_steps_sampled: 806000
    num_steps_trained: 806000
  iterations_since_restore: 806
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,806,14251.6,806000,-4.3962,-1.65,-9.48,416.97




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 807000
  custom_metrics: {}
  date: 2021-10-23_23-58-06
  done: false
  episode_len_mean: 419.78
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.477199999999956
  episode_reward_min: -10.979999999999942
  episodes_this_iter: 2
  episodes_total: 1124
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0011158796913013946
          cur_lr: 5.000000000000001e-05
          entropy: 1.4753319289949205
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04327733042753405
          total_loss: .inf
          vf_explained_var: -0.5524186491966248
          vf_loss: 8655079810.844444
    num_agent_steps_sampled: 807000
    num_agent_steps_trained: 807000
    num_steps_sampled: 807000
    num_steps_trained: 807000
  iterations_since_restore: 807


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,807,14278.7,807000,-4.4772,-1.65,-10.98,419.78




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 808000
  custom_metrics: {}
  date: 2021-10-23_23-58-29
  done: false
  episode_len_mean: 420.46
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.4839999999999565
  episode_reward_min: -10.979999999999942
  episodes_this_iter: 2
  episodes_total: 1126
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0016738195369520915
          cur_lr: 5.000000000000001e-05
          entropy: 1.2882365981737773
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.13931758320993848
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 9381579585.422222
    num_agent_steps_sampled: 808000
    num_agent_steps_trained: 808000
    num_steps_sampled: 808000
    num_steps_trained: 808000
  iterations_since_restore: 808
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,808,14302.2,808000,-4.484,-1.65,-10.98,420.46




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 809000
  custom_metrics: {}
  date: 2021-10-23_23-58-48
  done: false
  episode_len_mean: 425.6
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.535399999999955
  episode_reward_min: -10.979999999999942
  episodes_this_iter: 1
  episodes_total: 1127
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0025107293054281374
          cur_lr: 5.000000000000001e-05
          entropy: 1.0653098675939772
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.14230944712956747
          total_loss: .inf
          vf_explained_var: -0.6528024077415466
          vf_loss: 239632088985.6
    num_agent_steps_sampled: 809000
    num_agent_steps_trained: 809000
    num_steps_sampled: 809000
    num_steps_trained: 809000
  iterations_since_restore: 809
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,809,14320.4,809000,-4.5354,-1.65,-10.98,425.6




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 810000
  custom_metrics: {}
  date: 2021-10-23_23-59-14
  done: false
  episode_len_mean: 429.78
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.801599999999954
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 2
  episodes_total: 1129
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003766093958142205
          cur_lr: 5.000000000000001e-05
          entropy: 1.3704698668585884
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.01581020024087694
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 8322647042.844444
    num_agent_steps_sampled: 810000
    num_agent_steps_trained: 810000
    num_steps_sampled: 810000
    num_steps_trained: 810000
  iterations_since_restore: 810
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,810,14346.9,810000,-4.8016,-1.65,-19.63,429.78


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 811000
  custom_metrics: {}
  date: 2021-10-23_23-59-29
  done: false
  episode_len_mean: 431.81
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -4.819299999999954
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 1
  episodes_total: 1130
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.005649140937213312
          cur_lr: 5.000000000000001e-05
          entropy: 0.6970818910333846
          entropy_coeff: 0.009999999999999998
          kl: 1.5392871485816109
          policy_loss: -0.07292612923516167
          total_loss: 76853071689.95555
          vf_explained_var: -0.4836845099925995
          vf_loss: 76853071689.95555
    num_agent_steps_sampled: 811000
    num_agent_steps_trained: 811000
    num_steps_sampled: 811000
    num_steps_trained: 811000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,811,14362,811000,-4.8193,-1.65,-19.63,431.81




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 812000
  custom_metrics: {}
  date: 2021-10-23_23-59-54
  done: false
  episode_len_mean: 440.59
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -5.022399999999952
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 3
  episodes_total: 1133
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.008473711405819961
          cur_lr: 5.000000000000001e-05
          entropy: 1.3604498863220216
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.017731121099657483
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 7634953420.8
    num_agent_steps_sampled: 812000
    num_agent_steps_trained: 812000
    num_steps_sampled: 812000
    num_steps_trained: 812000
  iterations_since_restore: 812
  node_ip: 172.17.0.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,812,14386.4,812000,-5.0224,-1.65,-19.63,440.59




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 813000
  custom_metrics: {}
  date: 2021-10-24_00-00-14
  done: false
  episode_len_mean: 439.6
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -5.042199999999953
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 1
  episodes_total: 1134
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.012710567108729946
          cur_lr: 5.000000000000001e-05
          entropy: 1.4307313256793552
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.5291158176130719
          total_loss: .inf
          vf_explained_var: -0.5284019708633423
          vf_loss: 43577780480.0
    num_agent_steps_sampled: 813000
    num_agent_steps_trained: 813000
    num_steps_sampled: 813000
    num_steps_trained: 813000
  iterations_since_restore: 813
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,813,14407,813000,-5.0422,-1.65,-19.63,439.6




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 814000
  custom_metrics: {}
  date: 2021-10-24_00-00-48
  done: false
  episode_len_mean: 443.92
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -5.2406999999999515
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 3
  episodes_total: 1137
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01906585066309491
          cur_lr: 5.000000000000001e-05
          entropy: 1.2572380211618213
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.005590990848011441
          total_loss: .inf
          vf_explained_var: -0.5869153738021851
          vf_loss: 15925752996.977777
    num_agent_steps_sampled: 814000
    num_agent_steps_trained: 814000
    num_steps_sampled: 814000
    num_steps_trained: 814000
  iterations_since_restore: 814


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,814,14440.2,814000,-5.2407,-1.65,-19.63,443.92




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 815000
  custom_metrics: {}
  date: 2021-10-24_00-01-08
  done: false
  episode_len_mean: 445.08
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -5.3401999999999505
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 1
  episodes_total: 1138
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.028598775994642375
          cur_lr: 5.000000000000001e-05
          entropy: 1.0877193093299866
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.031077622373898824
          total_loss: .inf
          vf_explained_var: -0.7720062136650085
          vf_loss: 21245835667.91111
    num_agent_steps_sampled: 815000
    num_agent_steps_trained: 815000
    num_steps_sampled: 815000
    num_steps_trained: 815000
  iterations_since_restore: 815
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,815,14460.4,815000,-5.3402,-1.65,-19.63,445.08




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 816000
  custom_metrics: {}
  date: 2021-10-24_00-01-56
  done: false
  episode_len_mean: 453.48
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -5.5284999999999505
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 3
  episodes_total: 1141
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.042898163991963556
          cur_lr: 5.000000000000001e-05
          entropy: 1.1860370894273122
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.02031829572386212
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 5107610319.644444
    num_agent_steps_sampled: 816000
    num_agent_steps_trained: 816000
    num_steps_sampled: 816000
    num_steps_trained: 816000
  iterations_since_restore: 816
  node_ip: 172.17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,816,14508.1,816000,-5.5285,-1.65,-19.63,453.48


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 817000
  custom_metrics: {}
  date: 2021-10-24_00-02-11
  done: false
  episode_len_mean: 459.46
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -5.58829999999995
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 1
  episodes_total: 1142
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.06434724598794532
          cur_lr: 5.000000000000001e-05
          entropy: 0.9899586293432447
          entropy_coeff: 0.009999999999999998
          kl: 0.6778829666475455
          policy_loss: -0.07488388005230162
          total_loss: 34778913723.73333
          vf_explained_var: -0.4860202670097351
          vf_loss: 34778913723.73333
    num_agent_steps_sampled: 817000
    num_agent_steps_trained: 817000
    num_steps_sampled: 817000
    num_steps_trained: 817000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,817,14524,817000,-5.5883,-1.65,-19.63,459.46




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 818000
  custom_metrics: {}
  date: 2021-10-24_00-02-46
  done: false
  episode_len_mean: 457.97
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -5.774999999999949
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 3
  episodes_total: 1145
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.096520868981918
          cur_lr: 5.000000000000001e-05
          entropy: 1.688225475947062
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.012157636301385031
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 6780512716.8
    num_agent_steps_sampled: 818000
    num_agent_steps_trained: 818000
    num_steps_sampled: 818000
    num_steps_trained: 818000
  iterations_since_restore: 818
  node_ip: 172.17.0.2
  nu

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,818,14558.8,818000,-5.775,-1.65,-19.63,457.97




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 819000
  custom_metrics: {}
  date: 2021-10-24_00-03-15
  done: false
  episode_len_mean: 462.59
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -5.937499999999948
  episode_reward_min: -19.63000000000004
  episodes_this_iter: 2
  episodes_total: 1147
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.14478130347287702
          cur_lr: 5.000000000000001e-05
          entropy: 1.7314195288552179
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07251248814993434
          total_loss: .inf
          vf_explained_var: -0.22633908689022064
          vf_loss: 17044743367.11111
    num_agent_steps_sampled: 819000
    num_agent_steps_trained: 819000
    num_steps_sampled: 819000
    num_steps_trained: 819000
  iterations_since_restore: 819
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,819,14587.3,819000,-5.9375,-1.65,-19.63,462.59




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 820000
  custom_metrics: {}
  date: 2021-10-24_00-03-47
  done: false
  episode_len_mean: 465.46
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.220599999999947
  episode_reward_min: -21.770000000000113
  episodes_this_iter: 2
  episodes_total: 1149
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2171719552093155
          cur_lr: 5.000000000000001e-05
          entropy: 1.591883169280158
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06918722987174988
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 2676731965.5111113
    num_agent_steps_sampled: 820000
    num_agent_steps_trained: 820000
    num_steps_sampled: 820000
    num_steps_trained: 820000
  iterations_since_restore: 820
  node_ip: 172.17.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,820,14619.1,820000,-6.2206,-1.65,-21.77,465.46




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 821000
  custom_metrics: {}
  date: 2021-10-24_00-04-04
  done: false
  episode_len_mean: 469.37
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.395399999999949
  episode_reward_min: -21.770000000000113
  episodes_this_iter: 2
  episodes_total: 1151
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.32575793281397325
          cur_lr: 5.000000000000001e-05
          entropy: 1.014537486102846
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0015300685746802223
          total_loss: .inf
          vf_explained_var: -0.753906786441803
          vf_loss: 8830925727.288889
    num_agent_steps_sampled: 821000
    num_agent_steps_trained: 821000
    num_steps_sampled: 821000
    num_steps_trained: 821000
  iterations_since_restore: 821
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,821,14636.5,821000,-6.3954,-1.65,-21.77,469.37




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 822000
  custom_metrics: {}
  date: 2021-10-24_00-04-26
  done: false
  episode_len_mean: 473.13
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.432999999999948
  episode_reward_min: -21.770000000000113
  episodes_this_iter: 1
  episodes_total: 1152
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.4886368992209598
          cur_lr: 5.000000000000001e-05
          entropy: 1.1067628860473633
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04763843715190887
          total_loss: .inf
          vf_explained_var: -0.04874945804476738
          vf_loss: 18806071864.88889
    num_agent_steps_sampled: 822000
    num_agent_steps_trained: 822000
    num_steps_sampled: 822000
    num_steps_trained: 822000
  iterations_since_restore: 822
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,822,14658.4,822000,-6.433,-1.65,-21.77,473.13




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 823000
  custom_metrics: {}
  date: 2021-10-24_00-04-50
  done: false
  episode_len_mean: 478.32
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.632699999999948
  episode_reward_min: -21.770000000000113
  episodes_this_iter: 2
  episodes_total: 1154
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7329553488314399
          cur_lr: 5.000000000000001e-05
          entropy: 1.104301999674903
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.006422478126155005
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 5510502311.822222
    num_agent_steps_sampled: 823000
    num_agent_steps_trained: 823000
    num_steps_sampled: 823000
    num_steps_trained: 823000
  iterations_since_restore: 823
  node_ip: 172.17.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,823,14682,823000,-6.6327,-1.65,-21.77,478.32




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 824000
  custom_metrics: {}
  date: 2021-10-24_00-05-06
  done: false
  episode_len_mean: 481.37
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.87989999999995
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 1
  episodes_total: 1155
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0994330232471599
          cur_lr: 5.000000000000001e-05
          entropy: 0.4926287339793311
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.023289534284008873
          total_loss: .inf
          vf_explained_var: -0.8160877823829651
          vf_loss: 46619768649.95556
    num_agent_steps_sampled: 824000
    num_agent_steps_trained: 824000
    num_steps_sampled: 824000
    num_steps_trained: 824000
  iterations_since_restore: 824
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,824,14698.4,824000,-6.8799,-1.65,-27.46,481.37




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 825000
  custom_metrics: {}
  date: 2021-10-24_00-05-21
  done: false
  episode_len_mean: 488.54
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.951599999999948
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 1
  episodes_total: 1156
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6491495348707395
          cur_lr: 5.000000000000001e-05
          entropy: 0.32029993815554514
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.07863508115212123
          total_loss: .inf
          vf_explained_var: -0.6134464144706726
          vf_loss: 40507819963.73333
    num_agent_steps_sampled: 825000
    num_agent_steps_trained: 825000
    num_steps_sampled: 825000
    num_steps_trained: 825000
  iterations_since_restore: 825
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,825,14713.7,825000,-6.9516,-1.65,-27.46,488.54




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 826000
  custom_metrics: {}
  date: 2021-10-24_00-05-39
  done: false
  episode_len_mean: 499.03
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -7.0564999999999465
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 2
  episodes_total: 1158
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4737243023061093
          cur_lr: 5.000000000000001e-05
          entropy: 0.38375119533803725
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.054272706972228156
          total_loss: .inf
          vf_explained_var: -0.4828876554965973
          vf_loss: 27454192822.044445
    num_agent_steps_sampled: 826000
    num_agent_steps_trained: 826000
    num_steps_sampled: 826000
    num_steps_trained: 826000
  iterations_since_restore: 826

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,826,14730.8,826000,-7.0565,-1.65,-27.46,499.03




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 827000
  custom_metrics: {}
  date: 2021-10-24_00-05-53
  done: false
  episode_len_mean: 501.3
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -7.079199999999947
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 1
  episodes_total: 1159
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7105864534591637
          cur_lr: 5.000000000000001e-05
          entropy: 0.49235497381952076
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07294718283745978
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 3451303608.888889
    num_agent_steps_sampled: 827000
    num_agent_steps_trained: 827000
    num_steps_sampled: 827000
    num_steps_trained: 827000
  iterations_since_restore: 827
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,827,14745.3,827000,-7.0792,-1.65,-27.46,501.3




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 828000
  custom_metrics: {}
  date: 2021-10-24_00-06-10
  done: false
  episode_len_mean: 504.75
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -7.113699999999946
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 1
  episodes_total: 1160
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.565879680188744
          cur_lr: 5.000000000000001e-05
          entropy: 0.5469170113404592
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.009032778214249346
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 1386589879.4666667
    num_agent_steps_sampled: 828000
    num_agent_steps_trained: 828000
    num_steps_sampled: 828000
    num_steps_trained: 828000
  iterations_since_restore: 828
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,828,14762,828000,-7.1137,-1.65,-27.46,504.75




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 829000
  custom_metrics: {}
  date: 2021-10-24_00-06-25
  done: false
  episode_len_mean: 506.87
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -7.134899999999946
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 1
  episodes_total: 1161
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.348819520283115
          cur_lr: 5.000000000000001e-05
          entropy: 0.5106139911545647
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.010086967713303037
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 5570949417.244445
    num_agent_steps_sampled: 829000
    num_agent_steps_trained: 829000
    num_steps_sampled: 829000
    num_steps_trained: 829000
  iterations_since_restore: 829
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,829,14776.9,829000,-7.1349,-1.65,-27.46,506.87




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 830000
  custom_metrics: {}
  date: 2021-10-24_00-06-41
  done: false
  episode_len_mean: 518.94
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -7.255599999999942
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 2
  episodes_total: 1163
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 12.523229280424676
          cur_lr: 5.000000000000001e-05
          entropy: 0.7025900138749017
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.006939580705430773
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 2949106515.5555553
    num_agent_steps_sampled: 830000
    num_agent_steps_trained: 830000
    num_steps_sampled: 830000
    num_steps_trained: 830000
  iterations_since_restore: 830
  node_ip: 172.17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,830,14792.6,830000,-7.2556,-1.65,-27.46,518.94




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 831000
  custom_metrics: {}
  date: 2021-10-24_00-07-35
  done: false
  episode_len_mean: 512.91
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -7.195299999999943
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 4
  episodes_total: 1167
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 18.784843920637012
          cur_lr: 5.000000000000001e-05
          entropy: 0.2763393530001243
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.11285617436385817
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 1255629792.0
    num_agent_steps_sampled: 831000
    num_agent_steps_trained: 831000
    num_steps_sampled: 831000
    num_steps_trained: 831000
  iterations_since_restore: 831
  node_ip: 172.17.0.2
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,831,14847.3,831000,-7.1953,-1.65,-27.46,512.91




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 832000
  custom_metrics: {}
  date: 2021-10-24_00-08-54
  done: false
  episode_len_mean: 496.53
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -7.031499999999946
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1172
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 28.177265880955524
          cur_lr: 5.000000000000001e-05
          entropy: 0.5553210457166036
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.14984604583846198
          total_loss: .inf
          vf_explained_var: -0.8671652674674988
          vf_loss: 281409592.8888889
    num_agent_steps_sampled: 832000
    num_agent_steps_trained: 832000
    num_steps_sampled: 832000
    num_steps_trained: 832000
  iterations_since_restore: 832
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,832,14926.3,832000,-7.0315,-1.65,-27.46,496.53




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 833000
  custom_metrics: {}
  date: 2021-10-24_00-09-54
  done: false
  episode_len_mean: 484.49
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.911099999999949
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1177
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 42.265898821433275
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.031348576148351036
          total_loss: .inf
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 265387424.0
    num_agent_steps_sampled: 833000
    num_agent_steps_trained: 833000
    num_steps_sampled: 833000
    num_steps_trained: 833000
  iterations_since_restore: 833
  node_ip: 172.17.0.2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,833,14986.1,833000,-6.9111,-1.65,-27.46,484.49


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 834000
  custom_metrics: {}
  date: 2021-10-24_00-10-52
  done: false
  episode_len_mean: 469.19
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.758099999999952
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1182
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 63.39884823214992
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031348605950673424
          total_loss: 104765933.33333333
          vf_explained_var: 9.934107225717526e-08
          vf_loss: 104765933.33333333
    num_agent_steps_sampled: 834000
    num_agent_steps_trained: 834000
    num_steps_sampled: 834000
    num_steps_trained: 834000
  iterations_since_restore: 834
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,834,15044.4,834000,-6.7581,-1.65,-27.46,469.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 835000
  custom_metrics: {}
  date: 2021-10-24_00-11-52
  done: false
  episode_len_mean: 454.26
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.608799999999956
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1187
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 31.69942411607496
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03134877731402715
          total_loss: 104768376.97777778
          vf_explained_var: 2.2517310682701464e-08
          vf_loss: 104768376.8888889
    num_agent_steps_sampled: 835000
    num_agent_steps_trained: 835000
    num_steps_sampled: 835000
    num_steps_trained: 835000
  iterations_since_restore: 835
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,835,15104.3,835000,-6.6088,-1.65,-27.46,454.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 836000
  custom_metrics: {}
  date: 2021-10-24_00-12-48
  done: false
  episode_len_mean: 438.45
  episode_media: {}
  episode_reward_max: -1.6499999999999688
  episode_reward_mean: -6.450699999999959
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1192
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 15.84971205803748
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03134875992933909
          total_loss: 104222181.95555556
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 104222181.95555556
    num_agent_steps_sampled: 836000
    num_agent_steps_trained: 836000
    num_steps_sampled: 836000
    num_steps_trained: 836000
  iterations_since_restore: 836
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,836,15159.4,836000,-6.4507,-1.65,-27.46,438.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 837000
  custom_metrics: {}
  date: 2021-10-24_00-13-47
  done: false
  episode_len_mean: 422.2
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -6.309099999999961
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1197
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.92485602901874
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03134868790705999
          total_loss: 103731381.86666666
          vf_explained_var: -3.8411883451772155e-08
          vf_loss: 103731381.77777778
    num_agent_steps_sampled: 837000
    num_agent_steps_trained: 837000
    num_steps_sampled: 837000
    num_steps_trained: 837000
  iterations_since_restore: 837
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,837,15218.8,837000,-6.3091,-1.73,-27.46,422.2




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 838000
  custom_metrics: {}
  date: 2021-10-24_00-15-01
  done: false
  episode_len_mean: 411.08
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -6.160799999999963
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1202
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.96242801450937
          cur_lr: 5.000000000000001e-05
          entropy: 0.32037854161527424
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.11201392014821371
          total_loss: .inf
          vf_explained_var: 0.3934871256351471
          vf_loss: 13554986960.533333
    num_agent_steps_sampled: 838000
    num_agent_steps_trained: 838000
    num_steps_sampled: 838000
    num_steps_trained: 838000
  iterations_since_restore: 838
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,838,15293.4,838000,-6.1608,-1.73,-27.46,411.08


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 839000
  custom_metrics: {}
  date: 2021-10-24_00-16-01
  done: false
  episode_len_mean: 399.93
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -6.049299999999967
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1207
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.943642021764055
          cur_lr: 5.000000000000001e-05
          entropy: 7.222189523242724e-41
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031906952460606895
          total_loss: 543105693.8666667
          vf_explained_var: 8.742014756535355e-08
          vf_loss: 543105693.8666667
    num_agent_steps_sampled: 839000
    num_agent_steps_trained: 839000
    num_steps_sampled: 839000
    num_steps_trained: 839000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,839,15352.7,839000,-6.0493,-1.73,-27.46,399.93


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 840000
  custom_metrics: {}
  date: 2021-10-24_00-17-00
  done: false
  episode_len_mean: 386.84
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -5.837099999999968
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1212
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9718210108820275
          cur_lr: 5.000000000000001e-05
          entropy: 7.506413333763432e-41
          entropy_coeff: 0.009999999999999998
          kl: -1.8445758785395674e-43
          policy_loss: -0.03190666933854421
          total_loss: 109357717.33333333
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 109357717.33333333
    num_agent_steps_sampled: 840000
    num_agent_steps_trained: 840000
    num_steps_sampled: 840000
    num_steps_trained: 84

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,840,15412.1,840000,-5.8371,-1.73,-27.46,386.84


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 841000
  custom_metrics: {}
  date: 2021-10-24_00-18-00
  done: false
  episode_len_mean: 377.41
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -5.680299999999971
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1217
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4859105054410138
          cur_lr: 5.000000000000001e-05
          entropy: 7.854017873825552e-41
          entropy_coeff: 0.009999999999999998
          kl: -1.9602608517610497e-44
          policy_loss: -0.03190664698680242
          total_loss: 108583636.35555555
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 108583636.26666667
    num_agent_steps_sampled: 841000
    num_agent_steps_trained: 841000
    num_steps_sampled: 841000
    num_steps_trained: 84

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,841,15471.5,841000,-5.6803,-1.73,-27.46,377.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 842000
  custom_metrics: {}
  date: 2021-10-24_00-18-53
  done: false
  episode_len_mean: 362.17
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -5.461399999999974
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1222
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.7429552527205069
          cur_lr: 5.000000000000001e-05
          entropy: 8.397720120985288e-41
          entropy_coeff: 0.009999999999999998
          kl: -1.9836158261664632e-44
          policy_loss: -0.031906756261984505
          total_loss: 108383957.95555556
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 108383957.95555556
    num_agent_steps_sampled: 842000
    num_agent_steps_trained: 842000
    num_steps_sampled: 842000
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,842,15525.2,842000,-5.4614,-1.73,-27.46,362.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 843000
  custom_metrics: {}
  date: 2021-10-24_00-19-53
  done: false
  episode_len_mean: 344.22
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -5.228999999999976
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1227
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.37147762636025344
          cur_lr: 5.000000000000001e-05
          entropy: 9.007143266121857e-41
          entropy_coeff: 0.009999999999999998
          kl: -2.335497440541362e-44
          policy_loss: -0.03190680344899496
          total_loss: 108178002.4
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 108178002.31111111
    num_agent_steps_sampled: 843000
    num_agent_steps_trained: 843000
    num_steps_sampled: 843000
    num_steps_trained: 843000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,843,15584.8,843000,-5.229,-1.73,-27.46,344.22




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 844000
  custom_metrics: {}
  date: 2021-10-24_00-21-12
  done: false
  episode_len_mean: 324.05
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -4.71869999999998
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1232
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.18573881318012672
          cur_lr: 5.000000000000001e-05
          entropy: 0.5447751866446601
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.05114160676797231
          total_loss: .inf
          vf_explained_var: 0.4150964021682739
          vf_loss: 178392600667.9111
    num_agent_steps_sampled: 844000
    num_agent_steps_trained: 844000
    num_steps_sampled: 844000
    num_steps_trained: 844000
  iterations_since_restore: 844
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,844,15664.1,844000,-4.7187,-1.73,-27.46,324.05




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 845000
  custom_metrics: {}
  date: 2021-10-24_00-22-07
  done: false
  episode_len_mean: 311.19
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -4.376599999999983
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1237
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2786082197701901
          cur_lr: 5.000000000000001e-05
          entropy: 0.41466402974393635
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.1421466838568449
          total_loss: .inf
          vf_explained_var: 0.4442186653614044
          vf_loss: 230784183045.6889
    num_agent_steps_sampled: 845000
    num_agent_steps_trained: 845000
    num_steps_sampled: 845000
    num_steps_trained: 845000
  iterations_since_restore: 845
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,845,15718.7,845000,-4.3766,-1.73,-27.46,311.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 846000
  custom_metrics: {}
  date: 2021-10-24_00-23-03
  done: false
  episode_len_mean: 299.75
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -4.069999999999984
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 4
  episodes_total: 1241
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.41791232965528513
          cur_lr: 5.000000000000001e-05
          entropy: 0.42320043566740223
          entropy_coeff: 0.009999999999999998
          kl: 0.17201600680189122
          policy_loss: -0.13057411147488487
          total_loss: 18226015482.31111
          vf_explained_var: -0.30098000168800354
          vf_loss: 18226015482.31111
    num_agent_steps_sampled: 846000
    num_agent_steps_trained: 846000
    num_steps_sampled: 846000
    num_steps_trained: 846000
  it

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,846,15774.2,846000,-4.07,-1.73,-27.46,299.75




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 847000
  custom_metrics: {}
  date: 2021-10-24_00-23-51
  done: false
  episode_len_mean: 288.57
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -3.7565999999999873
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 4
  episodes_total: 1245
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.6268684944829276
          cur_lr: 5.000000000000001e-05
          entropy: 0.471103198826313
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.06585387074285083
          total_loss: .inf
          vf_explained_var: -0.5235679745674133
          vf_loss: 27137382889.244446
    num_agent_steps_sampled: 847000
    num_agent_steps_trained: 847000
    num_steps_sampled: 847000
    num_steps_trained: 847000
  iterations_since_restore: 847
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,847,15822.3,847000,-3.7566,-1.73,-27.46,288.57




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 848000
  custom_metrics: {}
  date: 2021-10-24_00-24-38
  done: false
  episode_len_mean: 281.0
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -3.3101999999999867
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 4
  episodes_total: 1249
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.9403027417243914
          cur_lr: 5.000000000000001e-05
          entropy: 0.7206898639599483
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.05723263555102878
          total_loss: .inf
          vf_explained_var: -0.3797298073768616
          vf_loss: 20202688716.8
    num_agent_steps_sampled: 848000
    num_agent_steps_trained: 848000
    num_steps_sampled: 848000
    num_steps_trained: 848000
  iterations_since_restore: 848
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,848,15869.8,848000,-3.3102,-1.73,-27.46,281


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 849000
  custom_metrics: {}
  date: 2021-10-24_00-25-37
  done: false
  episode_len_mean: 261.58
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.8324999999999902
  episode_reward_min: -27.460000000000186
  episodes_this_iter: 5
  episodes_total: 1254
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.410454112586587
          cur_lr: 5.000000000000001e-05
          entropy: 0.31568006670826837
          entropy_coeff: 0.009999999999999998
          kl: 0.12293804692389029
          policy_loss: 0.03468610242837005
          total_loss: 54216097382.4
          vf_explained_var: -0.2476644515991211
          vf_loss: 54216097382.4
    num_agent_steps_sampled: 849000
    num_agent_steps_trained: 849000
    num_steps_sampled: 849000
    num_steps_trained: 849000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,849,15928.1,849000,-2.8325,-1.73,-27.46,261.58




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 850000
  custom_metrics: {}
  date: 2021-10-24_00-26-27
  done: false
  episode_len_mean: 239.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.3931999999999927
  episode_reward_min: -9.719999999999837
  episodes_this_iter: 4
  episodes_total: 1258
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.115681168879881
          cur_lr: 5.000000000000001e-05
          entropy: 0.5574384914504157
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.22853036787774827
          total_loss: .inf
          vf_explained_var: -0.4281940460205078
          vf_loss: 20580911047.11111
    num_agent_steps_sampled: 850000
    num_agent_steps_trained: 850000
    num_steps_sampled: 850000
    num_steps_trained: 850000
  iterations_since_restore: 850
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,850,15978.3,850000,-2.3932,-1.73,-9.72,239.32




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 851000
  custom_metrics: {}
  date: 2021-10-24_00-27-39
  done: false
  episode_len_mean: 208.62
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0861999999999985
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1263
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1735217533198226
          cur_lr: 5.000000000000001e-05
          entropy: 0.8436420891020033
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.15986457897557152
          total_loss: .inf
          vf_explained_var: -0.45818015933036804
          vf_loss: 20455836472.88889
    num_agent_steps_sampled: 851000
    num_agent_steps_trained: 851000
    num_steps_sampled: 851000
    num_steps_trained: 851000
  iterations_since_restore: 851
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,851,16050.9,851000,-2.0862,-1.73,-3.04,208.62




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 852000
  custom_metrics: {}
  date: 2021-10-24_00-28-32
  done: false
  episode_len_mean: 208.84
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0883999999999996
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1268
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.760282629979733
          cur_lr: 5.000000000000001e-05
          entropy: 0.4847406400574578
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07389106050961547
          total_loss: .inf
          vf_explained_var: -0.009706281125545502
          vf_loss: 20312288221.866665
    num_agent_steps_sampled: 852000
    num_agent_steps_trained: 852000
    num_steps_sampled: 852000
    num_steps_trained: 852000
  iterations_since_restore: 852
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,852,16104,852000,-2.0884,-1.73,-3.04,208.84




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 853000
  custom_metrics: {}
  date: 2021-10-24_00-29-23
  done: false
  episode_len_mean: 210.65
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.1064999999999987
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 1272
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.140423944969599
          cur_lr: 5.000000000000001e-05
          entropy: 0.5824611700243421
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.021479421274529564
          total_loss: .inf
          vf_explained_var: -0.30989038944244385
          vf_loss: 6381288732.444445
    num_agent_steps_sampled: 853000
    num_agent_steps_trained: 853000
    num_steps_sampled: 853000
    num_steps_trained: 853000
  iterations_since_restore: 853
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,853,16154.7,853000,-2.1065,-1.73,-3.04,210.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 854000
  custom_metrics: {}
  date: 2021-10-24_00-30-18
  done: false
  episode_len_mean: 211.29
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.1128999999999984
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 1276
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 10.710635917454399
          cur_lr: 5.000000000000001e-05
          entropy: 0.41869927315662303
          entropy_coeff: 0.009999999999999998
          kl: 0.026080302550275856
          policy_loss: -0.06912843270434274
          total_loss: 4661406412.9777775
          vf_explained_var: -0.12723718583583832
          vf_loss: 4661406412.8
    num_agent_steps_sampled: 854000
    num_agent_steps_trained: 854000
    num_steps_sampled: 854000
    num_steps_trained: 854000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,854,16209.3,854000,-2.1129,-1.73,-3.04,211.29




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 855000
  custom_metrics: {}
  date: 2021-10-24_00-31-06
  done: false
  episode_len_mean: 212.82
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.1281999999999983
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1281
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 16.065953876181595
          cur_lr: 5.000000000000001e-05
          entropy: 0.412493125266499
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07500282873709997
          total_loss: .inf
          vf_explained_var: -0.4410819709300995
          vf_loss: 8327732843.733334
    num_agent_steps_sampled: 855000
    num_agent_steps_trained: 855000
    num_steps_sampled: 855000
    num_steps_trained: 855000
  iterations_since_restore: 855
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,855,16256.9,855000,-2.1282,-1.73,-3.04,212.82




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 856000
  custom_metrics: {}
  date: 2021-10-24_00-32-03
  done: false
  episode_len_mean: 213.35
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.1334999999999984
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1286
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 24.0989308142724
          cur_lr: 5.000000000000001e-05
          entropy: 0.3848371497458882
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1459341088930766
          total_loss: .inf
          vf_explained_var: 0.05123945698142052
          vf_loss: 2750905439.288889
    num_agent_steps_sampled: 856000
    num_agent_steps_trained: 856000
    num_steps_sampled: 856000
    num_steps_trained: 856000
  iterations_since_restore: 856
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,856,16314.8,856000,-2.1335,-1.73,-3.04,213.35




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 857000
  custom_metrics: {}
  date: 2021-10-24_00-33-18
  done: false
  episode_len_mean: 213.61
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.136099999999998
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 1290
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 36.1483962214086
          cur_lr: 5.000000000000001e-05
          entropy: 0.2001023824016253
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.09629964580138524
          total_loss: .inf
          vf_explained_var: -0.5616216659545898
          vf_loss: 6723371673.6
    num_agent_steps_sampled: 857000
    num_agent_steps_trained: 857000
    num_steps_sampled: 857000
    num_steps_trained: 857000
  iterations_since_restore: 857
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,857,16389.2,857000,-2.1361,-1.72,-3.04,213.61




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 858000
  custom_metrics: {}
  date: 2021-10-24_00-34-08
  done: false
  episode_len_mean: 215.33
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.153299999999998
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1295
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 54.2225943321129
          cur_lr: 5.000000000000001e-05
          entropy: 0.5823153767320844
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0005443152454164293
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 3793654062.9333334
    num_agent_steps_sampled: 858000
    num_agent_steps_trained: 858000
    num_steps_sampled: 858000
    num_steps_trained: 858000
  iterations_since_restore: 858
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,858,16439.4,858000,-2.1533,-1.72,-3.04,215.33




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 859000
  custom_metrics: {}
  date: 2021-10-24_00-35-05
  done: false
  episode_len_mean: 215.58
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.155799999999998
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 1299
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 81.33389149816935
          cur_lr: 5.000000000000001e-05
          entropy: 0.18297760295681656
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0580805543396208
          total_loss: .inf
          vf_explained_var: -0.1462913155555725
          vf_loss: 5144972464.355556
    num_agent_steps_sampled: 859000
    num_agent_steps_trained: 859000
    num_steps_sampled: 859000
    num_steps_trained: 859000
  iterations_since_restore: 859
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,859,16496,859000,-2.1558,-1.72,-3.04,215.58




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 860000
  custom_metrics: {}
  date: 2021-10-24_00-36-00
  done: false
  episode_len_mean: 215.83
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1582999999999974
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1304
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 122.00083724725401
          cur_lr: 5.000000000000001e-05
          entropy: 0.22567157808743005
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.03099825746483273
          total_loss: .inf
          vf_explained_var: 0.09225589781999588
          vf_loss: 1951201944.1777778
    num_agent_steps_sampled: 860000
    num_agent_steps_trained: 860000
    num_steps_sampled: 860000
    num_steps_trained: 860000
  iterations_since_restore: 860
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,860,16550.9,860000,-2.1583,-1.72,-3.04,215.83




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 861000
  custom_metrics: {}
  date: 2021-10-24_00-36-50
  done: false
  episode_len_mean: 217.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1737999999999973
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 4
  episodes_total: 1308
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 183.00125587088107
          cur_lr: 5.000000000000001e-05
          entropy: 0.5496903985738755
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0036476253531873225
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 2488928905.9555554
    num_agent_steps_sampled: 861000
    num_agent_steps_trained: 861000
    num_steps_sampled: 861000
    num_steps_trained: 861000
  iterations_since_restore: 861
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,861,16601,861000,-2.1738,-1.72,-3.04,217.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 862000
  custom_metrics: {}
  date: 2021-10-24_00-37-45
  done: false
  episode_len_mean: 217.7
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.176999999999997
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1313
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 274.50188380632153
          cur_lr: 5.000000000000001e-05
          entropy: 0.049970850710653596
          entropy_coeff: 0.009999999999999998
          kl: 0.010154987220804972
          policy_loss: -0.0728132420943843
          total_loss: 9743410164.622223
          vf_explained_var: 0.023638185113668442
          vf_loss: 9743410164.622223
    num_agent_steps_sampled: 862000
    num_agent_steps_trained: 862000
    num_steps_sampled: 862000
    num_steps_trained: 862000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,862,16656.2,862000,-2.177,-1.72,-3.04,217.7




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 863000
  custom_metrics: {}
  date: 2021-10-24_00-38-41
  done: false
  episode_len_mean: 218.03
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1802999999999972
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1318
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 274.50188380632153
          cur_lr: 5.000000000000001e-05
          entropy: 0.16700056344270706
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.196382647090488
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 12040055378.48889
    num_agent_steps_sampled: 863000
    num_agent_steps_trained: 863000
    num_steps_sampled: 863000
    num_steps_trained: 863000
  iterations_since_restore: 863
  node_ip: 172.17.0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,863,16712.5,863000,-2.1803,-1.72,-3.04,218.03




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 864000
  custom_metrics: {}
  date: 2021-10-24_00-39-57
  done: false
  episode_len_mean: 218.04
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1803999999999975
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1323
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 411.75282570948235
          cur_lr: 5.000000000000001e-05
          entropy: 0.24935209850470225
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06981267829736074
          total_loss: .inf
          vf_explained_var: -0.24654459953308105
          vf_loss: 1120808366.9333334
    num_agent_steps_sampled: 864000
    num_agent_steps_trained: 864000
    num_steps_sampled: 864000
    num_steps_trained: 864000
  iterations_since_restore: 864
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,864,16787.9,864000,-2.1804,-1.72,-3.04,218.04


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 865000
  custom_metrics: {}
  date: 2021-10-24_00-40-56
  done: false
  episode_len_mean: 218.16
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1815999999999973
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1328
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 617.6292385642236
          cur_lr: 5.000000000000001e-05
          entropy: 0.04042610383282105
          entropy_coeff: 0.009999999999999998
          kl: 0.0028410816021953956
          policy_loss: -0.13509020114110576
          total_loss: 3175542151.111111
          vf_explained_var: -0.462222158908844
          vf_loss: 3175542148.266667
    num_agent_steps_sampled: 865000
    num_agent_steps_trained: 865000
    num_steps_sampled: 865000
    num_steps_trained: 865000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,865,16847,865000,-2.1816,-1.72,-3.04,218.16




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 866000
  custom_metrics: {}
  date: 2021-10-24_00-41-53
  done: false
  episode_len_mean: 218.26
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.182599999999997
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1333
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 308.8146192821118
          cur_lr: 5.000000000000001e-05
          entropy: 0.10936291456843415
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.07699195709493425
          total_loss: .inf
          vf_explained_var: -0.29565858840942383
          vf_loss: 2300924360.888889
    num_agent_steps_sampled: 866000
    num_agent_steps_trained: 866000
    num_steps_sampled: 866000
    num_steps_trained: 866000
  iterations_since_restore: 866
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,866,16904.4,866000,-2.1826,-1.72,-3.04,218.26




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 867000
  custom_metrics: {}
  date: 2021-10-24_00-42-52
  done: false
  episode_len_mean: 217.8
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1779999999999973
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1338
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 463.2219289231676
          cur_lr: 5.000000000000001e-05
          entropy: 0.06998402691549725
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.015302453148696158
          total_loss: .inf
          vf_explained_var: 0.03656413406133652
          vf_loss: 2184531271.111111
    num_agent_steps_sampled: 867000
    num_agent_steps_trained: 867000
    num_steps_sampled: 867000
    num_steps_trained: 867000
  iterations_since_restore: 867
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,867,16962.8,867000,-2.178,-1.72,-3.04,217.8


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 868000
  custom_metrics: {}
  date: 2021-10-24_00-43-47
  done: false
  episode_len_mean: 216.75
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1674999999999978
  episode_reward_min: -3.039999999999979
  episodes_this_iter: 5
  episodes_total: 1343
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 694.8328933847514
          cur_lr: 5.000000000000001e-05
          entropy: 0.13459005136456753
          entropy_coeff: 0.009999999999999998
          kl: 0.03243034333798526
          policy_loss: -0.029709501812855404
          total_loss: 332105696.0
          vf_explained_var: -0.2235591858625412
          vf_loss: 332105673.24444443
    num_agent_steps_sampled: 868000
    num_agent_steps_trained: 868000
    num_steps_sampled: 868000
    num_steps_trained: 868000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,868,17017.6,868000,-2.1675,-1.72,-3.04,216.75




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 869000
  custom_metrics: {}
  date: 2021-10-24_00-44-45
  done: false
  episode_len_mean: 214.44
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.144399999999998
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 5
  episodes_total: 1348
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1042.249340077127
          cur_lr: 5.000000000000001e-05
          entropy: 0.10452383702827825
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.053715413146548804
          total_loss: .inf
          vf_explained_var: -0.960473895072937
          vf_loss: 501542031.34444445
    num_agent_steps_sampled: 869000
    num_agent_steps_trained: 869000
    num_steps_sampled: 869000
    num_steps_trained: 869000
  iterations_since_restore: 869
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,869,17076,869000,-2.1444,-1.72,-2.92,214.44




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 870000
  custom_metrics: {}
  date: 2021-10-24_00-45-59
  done: false
  episode_len_mean: 213.95
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1394999999999986
  episode_reward_min: -2.9199999999999817
  episodes_this_iter: 4
  episodes_total: 1352
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1563.3740101156905
          cur_lr: 5.000000000000001e-05
          entropy: 0.24557831287384033
          entropy_coeff: 0.009999999999999998
          kl: 1.6681174471146532
          policy_loss: -0.06841603318850199
          total_loss: 478273964.0638889
          vf_explained_var: 0.1625276803970337
          vf_loss: 478271354.8280382
    num_agent_steps_sampled: 870000
    num_agent_steps_trained: 870000
    num_steps_sampled: 870000
    num_steps_trained: 870000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,870,17149.8,870000,-2.1395,-1.72,-2.92,213.95




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 871000
  custom_metrics: {}
  date: 2021-10-24_00-46-55
  done: false
  episode_len_mean: 212.43
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1242999999999985
  episode_reward_min: -2.619999999999988
  episodes_this_iter: 5
  episodes_total: 1357
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2345.061015173535
          cur_lr: 5.000000000000001e-05
          entropy: 0.31672287268771065
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03290839178694619
          total_loss: .inf
          vf_explained_var: -0.6666669845581055
          vf_loss: 80660716.51032919
    num_agent_steps_sampled: 871000
    num_agent_steps_trained: 871000
    num_steps_sampled: 871000
    num_steps_trained: 871000
  iterations_since_restore: 871
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,871,17206.4,871000,-2.1243,-1.72,-2.62,212.43




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 872000
  custom_metrics: {}
  date: 2021-10-24_00-47-54
  done: false
  episode_len_mean: 212.0
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.1199999999999988
  episode_reward_min: -2.619999999999988
  episodes_this_iter: 5
  episodes_total: 1362
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3517.5915227603027
          cur_lr: 5.000000000000001e-05
          entropy: 0.16212383094761107
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0561465366019143
          total_loss: .inf
          vf_explained_var: -0.2091645747423172
          vf_loss: 793119535.8222222
    num_agent_steps_sampled: 872000
    num_agent_steps_trained: 872000
    num_steps_sampled: 872000
    num_steps_trained: 872000
  iterations_since_restore: 872
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,872,17264.5,872000,-2.12,-1.72,-2.62,212




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 873000
  custom_metrics: {}
  date: 2021-10-24_00-48-51
  done: false
  episode_len_mean: 211.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.113799999999999
  episode_reward_min: -2.619999999999988
  episodes_this_iter: 5
  episodes_total: 1367
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5276.387284140454
          cur_lr: 5.000000000000001e-05
          entropy: 0.265468344423506
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.040985854052835044
          total_loss: .inf
          vf_explained_var: -0.6666666865348816
          vf_loss: 313270223.9926585
    num_agent_steps_sampled: 873000
    num_agent_steps_trained: 873000
    num_steps_sampled: 873000
    num_steps_trained: 873000
  iterations_since_restore: 873
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,873,17322.3,873000,-2.1138,-1.72,-2.62,211.38




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 874000
  custom_metrics: {}
  date: 2021-10-24_00-49-45
  done: false
  episode_len_mean: 209.75
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0974999999999993
  episode_reward_min: -2.5999999999999885
  episodes_this_iter: 5
  episodes_total: 1372
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7914.58092621068
          cur_lr: 5.000000000000001e-05
          entropy: 0.09244114177094566
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.020774945120016732
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 1433933693.1666667
    num_agent_steps_sampled: 874000
    num_agent_steps_trained: 874000
    num_steps_sampled: 874000
    num_steps_trained: 874000
  iterations_since_restore: 874
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,874,17375.8,874000,-2.0975,-1.72,-2.6,209.75




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 875000
  custom_metrics: {}
  date: 2021-10-24_00-50-42
  done: false
  episode_len_mean: 208.69
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0868999999999995
  episode_reward_min: -2.5599999999999894
  episodes_this_iter: 5
  episodes_total: 1377
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 11871.871389316017
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03191326558589935
          total_loss: .inf
          vf_explained_var: 9.536743306171047e-08
          vf_loss: 956020868.2666667
    num_agent_steps_sampled: 875000
    num_agent_steps_trained: 875000
    num_steps_sampled: 875000
    num_steps_trained: 875000
  iterations_since_restore: 875
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,875,17433,875000,-2.0869,-1.72,-2.56,208.69




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 876000
  custom_metrics: {}
  date: 2021-10-24_00-52-00
  done: false
  episode_len_mean: 207.06
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0705999999999998
  episode_reward_min: -2.5199999999999902
  episodes_this_iter: 5
  episodes_total: 1382
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 17807.80708397404
          cur_lr: 5.000000000000001e-05
          entropy: 0.056051420213447674
          entropy_coeff: 0.009999999999999998
          kl: 0.018279940039954253
          policy_loss: 0.35324451732966633
          total_loss: 55848942308.62222
          vf_explained_var: 0.0004610048490576446
          vf_loss: 55848942007.82222
    num_agent_steps_sampled: 876000
    num_agent_steps_trained: 876000
    num_steps_sampled: 876000
    num_steps_trained: 876000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,876,17510.8,876000,-2.0706,-1.72,-2.52,207.06




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 877000
  custom_metrics: {}
  date: 2021-10-24_00-52-58
  done: false
  episode_len_mean: 206.43
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0643
  episode_reward_min: -2.5199999999999902
  episodes_this_iter: 5
  episodes_total: 1387
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 17807.80708397404
          cur_lr: 5.000000000000001e-05
          entropy: 0.0006294789864663163
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.014528247010376718
          total_loss: .inf
          vf_explained_var: -0.33332961797714233
          vf_loss: 1157020207.6444445
    num_agent_steps_sampled: 877000
    num_agent_steps_trained: 877000
    num_steps_sampled: 877000
    num_steps_trained: 877000
  iterations_since_restore: 877
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,877,17568.7,877000,-2.0643,-1.72,-2.52,206.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 878000
  custom_metrics: {}
  date: 2021-10-24_00-53-57
  done: false
  episode_len_mean: 206.16
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.0616
  episode_reward_min: -2.5199999999999902
  episodes_this_iter: 5
  episodes_total: 1392
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 26711.710625961052
          cur_lr: 5.000000000000001e-05
          entropy: 0.003662988560740907
          entropy_coeff: 0.009999999999999998
          kl: 0.0009195963541666667
          policy_loss: -0.03183505112926165
          total_loss: 116978815.37777779
          vf_explained_var: 1.3245476937484568e-09
          vf_loss: 116978790.66666667
    num_agent_steps_sampled: 878000
    num_agent_steps_trained: 878000
    num_steps_sampled: 878000
    num_steps_trained: 878000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,878,17628,878000,-2.0616,-1.74,-2.52,206.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 879000
  custom_metrics: {}
  date: 2021-10-24_00-54-57
  done: false
  episode_len_mean: 204.69
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.0469
  episode_reward_min: -2.5199999999999902
  episodes_this_iter: 5
  episodes_total: 1397
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 13355.855312980526
          cur_lr: 5.000000000000001e-05
          entropy: 0.002881418122602339
          entropy_coeff: 0.009999999999999998
          kl: 0.0004719111614682525
          policy_loss: -0.03179528456595209
          total_loss: 91797402.48888889
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 91797396.35555555
    num_agent_steps_sampled: 879000
    num_agent_steps_trained: 879000
    num_steps_sampled: 879000
    num_steps_trained: 879000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,879,17687.6,879000,-2.0469,-1.74,-2.52,204.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 880000
  custom_metrics: {}
  date: 2021-10-24_00-55-56
  done: false
  episode_len_mean: 204.04
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.0404
  episode_reward_min: -2.5199999999999902
  episodes_this_iter: 5
  episodes_total: 1402
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6677.927656490263
          cur_lr: 5.000000000000001e-05
          entropy: 0.007395416780787005
          entropy_coeff: 0.009999999999999998
          kl: 0.0017691935112278203
          policy_loss: -0.031779904001288946
          total_loss: 90719782.31111111
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 90719770.57777777
    num_agent_steps_sampled: 880000
    num_agent_steps_trained: 880000
    num_steps_sampled: 880000
    num_steps_trained: 880000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,880,17746.5,880000,-2.0404,-1.74,-2.52,204.04


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 881000
  custom_metrics: {}
  date: 2021-10-24_00-56-51
  done: false
  episode_len_mean: 202.85
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.0285
  episode_reward_min: -2.399999999999993
  episodes_this_iter: 5
  episodes_total: 1407
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3338.9638282451315
          cur_lr: 5.000000000000001e-05
          entropy: 0.009579738316934882
          entropy_coeff: 0.009999999999999998
          kl: 0.0023491717676037828
          policy_loss: -0.03163913612564405
          total_loss: 89772426.57777777
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 89772418.84444444
    num_agent_steps_sampled: 881000
    num_agent_steps_trained: 881000
    num_steps_sampled: 881000
    num_steps_trained: 881000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,881,17801.6,881000,-2.0285,-1.74,-2.4,202.85




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 882000
  custom_metrics: {}
  date: 2021-10-24_00-58-05
  done: false
  episode_len_mean: 201.87
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0187000000000004
  episode_reward_min: -2.399999999999993
  episodes_this_iter: 5
  episodes_total: 1412
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1669.4819141225657
          cur_lr: 5.000000000000001e-05
          entropy: 0.08933841723851654
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.016182459849450322
          total_loss: .inf
          vf_explained_var: -0.29848119616508484
          vf_loss: 160810246.13333333
    num_agent_steps_sampled: 882000
    num_agent_steps_trained: 882000
    num_steps_sampled: 882000
    num_steps_trained: 882000
  iterations_since_restore: 882

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,882,17875.4,882000,-2.0187,-1.72,-2.4,201.87




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 883000
  custom_metrics: {}
  date: 2021-10-24_00-58-57
  done: false
  episode_len_mean: 201.7
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0170000000000003
  episode_reward_min: -2.399999999999993
  episodes_this_iter: 5
  episodes_total: 1417
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2504.222871183848
          cur_lr: 5.000000000000001e-05
          entropy: 0.17813571481965482
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.031102269432610935
          total_loss: .inf
          vf_explained_var: -0.5841814875602722
          vf_loss: 3790918682630.5776
    num_agent_steps_sampled: 883000
    num_agent_steps_trained: 883000
    num_steps_sampled: 883000
    num_steps_trained: 883000
  iterations_since_restore: 883
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,883,17927.3,883000,-2.017,-1.72,-2.4,201.7




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 884000
  custom_metrics: {}
  date: 2021-10-24_00-59-52
  done: false
  episode_len_mean: 201.8
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0180000000000002
  episode_reward_min: -2.399999999999993
  episodes_this_iter: 5
  episodes_total: 1422
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3756.3343067757733
          cur_lr: 5.000000000000001e-05
          entropy: 0.05624843256341087
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0703627496957779
          total_loss: .inf
          vf_explained_var: -0.43417981266975403
          vf_loss: 326134900716.08887
    num_agent_steps_sampled: 884000
    num_agent_steps_trained: 884000
    num_steps_sampled: 884000
    num_steps_trained: 884000
  iterations_since_restore: 884
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,884,17982.8,884000,-2.018,-1.72,-2.4,201.8




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 885000
  custom_metrics: {}
  date: 2021-10-24_01-00-51
  done: false
  episode_len_mean: 201.78
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0178000000000007
  episode_reward_min: -2.399999999999993
  episodes_this_iter: 5
  episodes_total: 1427
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5634.5014601636585
          cur_lr: 5.000000000000001e-05
          entropy: 0.05396203346964386
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.028938074451353817
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 549724500.7333333
    num_agent_steps_sampled: 885000
    num_agent_steps_trained: 885000
    num_steps_sampled: 885000
    num_steps_trained: 885000
  iterations_since_restore: 885
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,885,18041.1,885000,-2.0178,-1.72,-2.4,201.78


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 886000
  custom_metrics: {}
  date: 2021-10-24_01-01-49
  done: false
  episode_len_mean: 201.75
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.017500000000001
  episode_reward_min: -2.399999999999993
  episodes_this_iter: 5
  episodes_total: 1432
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8451.752190245488
          cur_lr: 5.000000000000001e-05
          entropy: 0.08546109632071522
          entropy_coeff: 0.009999999999999998
          kl: 0.03415233042194611
          policy_loss: -0.07435738411214617
          total_loss: 1978500196.9777777
          vf_explained_var: 0.014628966338932514
          vf_loss: 1978499841.4222221
    num_agent_steps_sampled: 886000
    num_agent_steps_trained: 886000
    num_steps_sampled: 886000
    num_steps_trained: 886000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,886,18099.4,886000,-2.0175,-1.72,-2.4,201.75




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 887000
  custom_metrics: {}
  date: 2021-10-24_01-02-39
  done: false
  episode_len_mean: 201.76
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0176000000000007
  episode_reward_min: -2.399999999999993
  episodes_this_iter: 5
  episodes_total: 1437
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 12677.628285368231
          cur_lr: 5.000000000000001e-05
          entropy: 0.09000580186645189
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.026139429459969204
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 1285138362.7055554
    num_agent_steps_sampled: 887000
    num_agent_steps_trained: 887000
    num_steps_sampled: 887000
    num_steps_trained: 887000
  iterations_since_restore: 887
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,887,18149.5,887000,-2.0176,-1.72,-2.4,201.76




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 888000
  custom_metrics: {}
  date: 2021-10-24_01-03-51
  done: false
  episode_len_mean: 201.51
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0151000000000003
  episode_reward_min: -2.399999999999993
  episodes_this_iter: 5
  episodes_total: 1442
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 19016.44242805234
          cur_lr: 5.000000000000001e-05
          entropy: 0.1279317354162534
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.008214249089360238
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 333038839.5273224
    num_agent_steps_sampled: 888000
    num_agent_steps_trained: 888000
    num_steps_sampled: 888000
    num_steps_trained: 888000
  iterations_since_restore: 888
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,888,18221.9,888000,-2.0151,-1.72,-2.4,201.51




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 889000
  custom_metrics: {}
  date: 2021-10-24_01-04-46
  done: false
  episode_len_mean: 201.39
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.013900000000001
  episode_reward_min: -2.399999999999993
  episodes_this_iter: 5
  episodes_total: 1447
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 28524.663642078518
          cur_lr: 5.000000000000001e-05
          entropy: 0.03658372287812829
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.029269184875819417
          total_loss: .inf
          vf_explained_var: -0.3333331048488617
          vf_loss: 1058073899.9333333
    num_agent_steps_sampled: 889000
    num_agent_steps_trained: 889000
    num_steps_sampled: 889000
    num_steps_trained: 889000
  iterations_since_restore: 889
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,889,18276.7,889000,-2.0139,-1.72,-2.4,201.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 890000
  custom_metrics: {}
  date: 2021-10-24_01-05-36
  done: false
  episode_len_mean: 200.96
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0096000000000007
  episode_reward_min: -2.0899999999999994
  episodes_this_iter: 5
  episodes_total: 1452
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 42786.99546311778
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03194869061311086
          total_loss: 1379487611.7333333
          vf_explained_var: -1.7815166586387932e-07
          vf_loss: 1379487611.7333333
    num_agent_steps_sampled: 890000
    num_agent_steps_trained: 890000
    num_steps_sampled: 890000
    num_steps_trained: 890000
  iterations_since_restore: 890


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,890,18326,890000,-2.0096,-1.72,-2.09,200.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 891000
  custom_metrics: {}
  date: 2021-10-24_01-06-28
  done: false
  episode_len_mean: 200.67
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.006700000000001
  episode_reward_min: -2.06
  episodes_this_iter: 5
  episodes_total: 1457
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 21393.49773155889
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0319500466187795
          total_loss: 154933788.26666668
          vf_explained_var: -4.635916894812908e-08
          vf_loss: 154933788.08888888
    num_agent_steps_sampled: 891000
    num_agent_steps_trained: 891000
    num_steps_sampled: 891000
    num_steps_trained: 891000
  iterations_since_restore: 891
  node_ip: 172.17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,891,18378.9,891000,-2.0067,-1.72,-2.06,200.67


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 892000
  custom_metrics: {}
  date: 2021-10-24_01-07-26
  done: false
  episode_len_mean: 200.54
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.0054000000000007
  episode_reward_min: -2.06
  episodes_this_iter: 5
  episodes_total: 1462
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 10696.748865779446
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03194991002480189
          total_loss: 154543468.97777778
          vf_explained_var: 1.033147185580674e-07
          vf_loss: 154543468.8
    num_agent_steps_sampled: 892000
    num_agent_steps_trained: 892000
    num_steps_sampled: 892000
    num_steps_trained: 892000
  iterations_since_restore: 892
  node_ip: 172.17.0.2


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,892,18435.9,892000,-2.0054,-1.72,-2.06,200.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 893000
  custom_metrics: {}
  date: 2021-10-24_01-08-16
  done: false
  episode_len_mean: 200.3
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.003000000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1467
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5348.374432889723
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03194995721181234
          total_loss: 153704772.8
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 153704772.62222221
    num_agent_steps_sampled: 893000
    num_agent_steps_trained: 893000
    num_steps_sampled: 893000
    num_steps_trained: 893000
  iterations_since_restore: 893
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,893,18486.2,893000,-2.003,-1.72,-2.05,200.3




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 894000
  custom_metrics: {}
  date: 2021-10-24_01-09-31
  done: false
  episode_len_mean: 199.86
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9986000000000006
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1472
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2674.1872164448614
          cur_lr: 5.000000000000001e-05
          entropy: 0.02781286539102439
          entropy_coeff: 0.009999999999999998
          kl: 0.0039765047323372625
          policy_loss: 0.18450674778885312
          total_loss: 363858620691.91113
          vf_explained_var: 0.14491185545921326
          vf_loss: 363858620691.91113
    num_agent_steps_sampled: 894000
    num_agent_steps_trained: 894000
    num_steps_sampled: 894000
    num_steps_trained: 894000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,894,18560.9,894000,-1.9986,-1.72,-2.05,199.86


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 895000
  custom_metrics: {}
  date: 2021-10-24_01-10-27
  done: false
  episode_len_mean: 199.85
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9985000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1477
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1337.0936082224307
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03183365116516749
          total_loss: 215765729826.13333
          vf_explained_var: -1.7219120351796846e-08
          vf_loss: 215765729826.13333
    num_agent_steps_sampled: 895000
    num_agent_steps_trained: 895000
    num_steps_sampled: 895000
    num_steps_trained: 895000
  iterations_since_restore: 895

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,895,18616.9,895000,-1.9985,-1.72,-2.05,199.85


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 896000
  custom_metrics: {}
  date: 2021-10-24_01-11-26
  done: false
  episode_len_mean: 200.09
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.000900000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1482
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 668.5468041112154
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03183359156052271
          total_loss: 195085605.33333334
          vf_explained_var: -1.8543667934523e-08
          vf_loss: 195085605.33333334
    num_agent_steps_sampled: 896000
    num_agent_steps_trained: 896000
    num_steps_sampled: 896000
    num_steps_trained: 896000
  iterations_since_restore: 896
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,896,18676.5,896000,-2.0009,-1.72,-2.05,200.09


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 897000
  custom_metrics: {}
  date: 2021-10-24_01-12-26
  done: false
  episode_len_mean: 200.03
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.000300000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1487
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 334.2734020556077
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03183341771364212
          total_loss: 193392232.53333333
          vf_explained_var: 1.059638137235197e-07
          vf_loss: 193392232.17777777
    num_agent_steps_sampled: 897000
    num_agent_steps_trained: 897000
    num_steps_sampled: 897000
    num_steps_trained: 897000
  iterations_since_restore: 897
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,897,18736.2,897000,-2.0003,-1.72,-2.05,200.03


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 898000
  custom_metrics: {}
  date: 2021-10-24_01-13-25
  done: false
  episode_len_mean: 200.03
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.000300000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1492
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 167.13670102780384
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03183348476886749
          total_loss: 192676262.04444444
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 192676262.04444444
    num_agent_steps_sampled: 898000
    num_agent_steps_trained: 898000
    num_steps_sampled: 898000
    num_steps_trained: 898000
  iterations_since_restore: 898
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,898,18795.2,898000,-2.0003,-1.72,-2.05,200.03


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 899000
  custom_metrics: {}
  date: 2021-10-24_01-14-19
  done: false
  episode_len_mean: 200.03
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -2.000300000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1497
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 83.56835051390192
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03183338791131973
          total_loss: 192037335.1111111
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 192037335.1111111
    num_agent_steps_sampled: 899000
    num_agent_steps_trained: 899000
    num_steps_sampled: 899000
    num_steps_trained: 899000
  iterations_since_restore: 899
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,899,18849.3,899000,-2.0003,-1.72,-2.05,200.03




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 900000
  custom_metrics: {}
  date: 2021-10-24_01-15-37
  done: false
  episode_len_mean: 199.81
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9981000000000007
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1502
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 41.78417525695096
          cur_lr: 5.000000000000001e-05
          entropy: 0.011872601384917894
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.08426096787055333
          total_loss: .inf
          vf_explained_var: -0.6666666865348816
          vf_loss: 1968979878.7555556
    num_agent_steps_sampled: 900000
    num_agent_steps_trained: 900000
    num_steps_sampled: 900000
    num_steps_trained: 900000
  iterations_since_restore: 900
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,900,18927.2,900000,-1.9981,-1.72,-2.05,199.81




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 901000
  custom_metrics: {}
  date: 2021-10-24_01-16-32
  done: false
  episode_len_mean: 199.9
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.999000000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1507
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 62.67626288542643
          cur_lr: 5.000000000000001e-05
          entropy: 0.06204876104990641
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.029136187086502712
          total_loss: .inf
          vf_explained_var: -0.6665903925895691
          vf_loss: 971299505.5723268
    num_agent_steps_sampled: 901000
    num_agent_steps_trained: 901000
    num_steps_sampled: 901000
    num_steps_trained: 901000
  iterations_since_restore: 901
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,901,18982.1,901000,-1.999,-1.72,-2.05,199.9




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 902000
  custom_metrics: {}
  date: 2021-10-24_01-17-31
  done: false
  episode_len_mean: 200.28
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.002800000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1512
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 94.01439432813964
          cur_lr: 5.000000000000001e-05
          entropy: 0.1795798482373357
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.2656558734261327
          total_loss: .inf
          vf_explained_var: 0.355741411447525
          vf_loss: 1724552507.06875
    num_agent_steps_sampled: 902000
    num_agent_steps_trained: 902000
    num_steps_sampled: 902000
    num_steps_trained: 902000
  iterations_since_restore: 902
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,902,19040.9,902000,-2.0028,-1.73,-2.05,200.28




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 903000
  custom_metrics: {}
  date: 2021-10-24_01-18-31
  done: false
  episode_len_mean: 200.25
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0025000000000013
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1517
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 141.02159149220947
          cur_lr: 5.000000000000001e-05
          entropy: 0.059211788119541274
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03912810608744621
          total_loss: .inf
          vf_explained_var: -0.2317483127117157
          vf_loss: 943042195298.1333
    num_agent_steps_sampled: 903000
    num_agent_steps_trained: 903000
    num_steps_sampled: 903000
    num_steps_trained: 903000
  iterations_since_restore: 903


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,903,19101.2,903000,-2.0025,-1.73,-2.05,200.25




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 904000
  custom_metrics: {}
  date: 2021-10-24_01-19-32
  done: false
  episode_len_mean: 200.25
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0025000000000004
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1522
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 211.53238723831421
          cur_lr: 5.000000000000001e-05
          entropy: 0.044410087251000936
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02996061493953069
          total_loss: .inf
          vf_explained_var: -0.7996014356613159
          vf_loss: 1398063500.87489
    num_agent_steps_sampled: 904000
    num_agent_steps_trained: 904000
    num_steps_sampled: 904000
    num_steps_trained: 904000
  iterations_since_restore: 904
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,904,19161.6,904000,-2.0025,-1.73,-2.05,200.25




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 905000
  custom_metrics: {}
  date: 2021-10-24_01-20-21
  done: false
  episode_len_mean: 200.23
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.002300000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1527
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 317.2985808574713
          cur_lr: 5.000000000000001e-05
          entropy: 0.067527593837844
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1927184338370959
          total_loss: .inf
          vf_explained_var: -0.9500605463981628
          vf_loss: 168354932.2222222
    num_agent_steps_sampled: 905000
    num_agent_steps_trained: 905000
    num_steps_sampled: 905000
    num_steps_trained: 905000
  iterations_since_restore: 905
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,905,19211.4,905000,-2.0023,-1.73,-2.05,200.23




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 906000
  custom_metrics: {}
  date: 2021-10-24_01-21-32
  done: false
  episode_len_mean: 199.82
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9982000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1532
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 475.947871286207
          cur_lr: 5.000000000000001e-05
          entropy: 0.05803971174690459
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.03520313832494948
          total_loss: .inf
          vf_explained_var: 0.13453201949596405
          vf_loss: 1054731108.3555555
    num_agent_steps_sampled: 906000
    num_agent_steps_trained: 906000
    num_steps_sampled: 906000
    num_steps_trained: 906000
  iterations_since_restore: 906
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,906,19282.3,906000,-1.9982,-1.73,-2.05,199.82




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 907000
  custom_metrics: {}
  date: 2021-10-24_01-22-22
  done: false
  episode_len_mean: 199.73
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9973000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1537
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 713.9218069293103
          cur_lr: 5.000000000000001e-05
          entropy: 0.03576236000388033
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.05019556250837114
          total_loss: .inf
          vf_explained_var: -0.6669235229492188
          vf_loss: 1076066530.6055555
    num_agent_steps_sampled: 907000
    num_agent_steps_trained: 907000
    num_steps_sampled: 907000
    num_steps_trained: 907000
  iterations_since_restore: 907
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,907,19331.9,907000,-1.9973,-1.73,-2.04,199.73




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 908000
  custom_metrics: {}
  date: 2021-10-24_01-23-12
  done: false
  episode_len_mean: 199.9
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9990000000000006
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1542
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1070.8827103939657
          cur_lr: 5.000000000000001e-05
          entropy: 0.05311381823072831
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.09527549867828687
          total_loss: .inf
          vf_explained_var: -0.3473755419254303
          vf_loss: 66025103.32152778
    num_agent_steps_sampled: 908000
    num_agent_steps_trained: 908000
    num_steps_sampled: 908000
    num_steps_trained: 908000
  iterations_since_restore: 908
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,908,19382.4,908000,-1.999,-1.73,-2.04,199.9


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 909000
  custom_metrics: {}
  date: 2021-10-24_01-24-04
  done: false
  episode_len_mean: 199.94
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9994000000000014
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1547
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1606.3240655909487
          cur_lr: 5.000000000000001e-05
          entropy: 0.06191729307174683
          entropy_coeff: 0.009999999999999998
          kl: 9.052853935524277e-06
          policy_loss: -0.04336694470710224
          total_loss: 29963808233.244446
          vf_explained_var: 0.42959752678871155
          vf_loss: 29963808233.244446
    num_agent_steps_sampled: 909000
    num_agent_steps_trained: 909000
    num_steps_sampled: 909000
    num_steps_trained: 909000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,909,19433.9,909000,-1.9994,-1.73,-2.04,199.94




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 910000
  custom_metrics: {}
  date: 2021-10-24_01-24-53
  done: false
  episode_len_mean: 199.98
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9998000000000011
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1552
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 803.1620327954744
          cur_lr: 5.000000000000001e-05
          entropy: 0.025050959653324552
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02957333789931403
          total_loss: .inf
          vf_explained_var: -0.6342020630836487
          vf_loss: 2405149012.954691
    num_agent_steps_sampled: 910000
    num_agent_steps_trained: 910000
    num_steps_sampled: 910000
    num_steps_trained: 910000
  iterations_since_restore: 910
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,910,19482.6,910000,-1.9998,-1.73,-2.04,199.98




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 911000
  custom_metrics: {}
  date: 2021-10-24_01-25-43
  done: false
  episode_len_mean: 199.99
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9999000000000011
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1557
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1204.7430491932118
          cur_lr: 5.000000000000001e-05
          entropy: 0.016904260673456722
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.02812869283888075
          total_loss: .inf
          vf_explained_var: -0.08591777831315994
          vf_loss: 1217689264.9777777
    num_agent_steps_sampled: 911000
    num_agent_steps_trained: 911000
    num_steps_sampled: 911000
    num_steps_trained: 911000
  iterations_since_restore: 911

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,911,19532.8,911000,-1.9999,-1.73,-2.04,199.99




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 912000
  custom_metrics: {}
  date: 2021-10-24_01-26-55
  done: false
  episode_len_mean: 199.78
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.997800000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1562
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1807.1145737898173
          cur_lr: 5.000000000000001e-05
          entropy: 0.029437204584893253
          entropy_coeff: 0.009999999999999998
          kl: 1.7114067783315554e-05
          policy_loss: -0.09191757688919704
          total_loss: 2775328411.022222
          vf_explained_var: 0.5343979597091675
          vf_loss: 2775328411.022222
    num_agent_steps_sampled: 912000
    num_agent_steps_trained: 912000
    num_steps_sampled: 912000
    num_steps_trained: 912000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,912,19604.9,912000,-1.9978,-1.73,-2.04,199.78


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 913000
  custom_metrics: {}
  date: 2021-10-24_01-27-49
  done: false
  episode_len_mean: 199.84
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9984000000000015
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1567
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 903.5572868949087
          cur_lr: 5.000000000000001e-05
          entropy: 0.020681514901419482
          entropy_coeff: 0.009999999999999998
          kl: -5.6055047097874285e-09
          policy_loss: -0.020871072241829502
          total_loss: 82173743315.2
          vf_explained_var: 0.05615604668855667
          vf_loss: 82173743315.2
    num_agent_steps_sampled: 913000
    num_agent_steps_trained: 913000
    num_steps_sampled: 913000
    num_steps_trained: 913000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,913,19659.2,913000,-1.9984,-1.73,-2.04,199.84


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 914000
  custom_metrics: {}
  date: 2021-10-24_01-28-44
  done: false
  episode_len_mean: 200.18
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.001800000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1572
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 451.77864344745433
          cur_lr: 5.000000000000001e-05
          entropy: 0.04536360430841645
          entropy_coeff: 0.009999999999999998
          kl: 5.712054305847166e-08
          policy_loss: -0.1393725131948789
          total_loss: 1227861718.0444446
          vf_explained_var: -0.8324203491210938
          vf_loss: 1227861718.0444446
    num_agent_steps_sampled: 914000
    num_agent_steps_trained: 914000
    num_steps_sampled: 914000
    num_steps_trained: 914000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,914,19714.3,914000,-2.0018,-1.73,-2.04,200.18




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 915000
  custom_metrics: {}
  date: 2021-10-24_01-29-37
  done: false
  episode_len_mean: 200.22
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.002200000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1577
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 225.88932172372716
          cur_lr: 5.000000000000001e-05
          entropy: 0.02738462657564216
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0024244725704193116
          total_loss: .inf
          vf_explained_var: 0.4162384569644928
          vf_loss: 1397316295.8222222
    num_agent_steps_sampled: 915000
    num_agent_steps_trained: 915000
    num_steps_sampled: 915000
    num_steps_trained: 915000
  iterations_since_restore: 915


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,915,19766.7,915000,-2.0022,-1.73,-2.04,200.22




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 916000
  custom_metrics: {}
  date: 2021-10-24_01-30-36
  done: false
  episode_len_mean: 200.33
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.003300000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1582
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 338.83398258559066
          cur_lr: 5.000000000000001e-05
          entropy: 0.06819352590375477
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1021089981827471
          total_loss: .inf
          vf_explained_var: -0.7721427083015442
          vf_loss: 3120064089.5555553
    num_agent_steps_sampled: 916000
    num_agent_steps_trained: 916000
    num_steps_sampled: 916000
    num_steps_trained: 916000
  iterations_since_restore: 916
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,916,19825.8,916000,-2.0033,-1.73,-2.04,200.33




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 917000
  custom_metrics: {}
  date: 2021-10-24_01-31-35
  done: false
  episode_len_mean: 200.37
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0037000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1587
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 508.25097387838616
          cur_lr: 5.000000000000001e-05
          entropy: 0.06078221302272545
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.09994802872339885
          total_loss: .inf
          vf_explained_var: -0.2540875971317291
          vf_loss: 684700755.5597222
    num_agent_steps_sampled: 917000
    num_agent_steps_trained: 917000
    num_steps_sampled: 917000
    num_steps_trained: 917000
  iterations_since_restore: 917
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,917,19884.4,917000,-2.0037,-1.73,-2.04,200.37




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 918000
  custom_metrics: {}
  date: 2021-10-24_01-32-41
  done: false
  episode_len_mean: 200.16
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0016000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1592
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 762.3764608175791
          cur_lr: 5.000000000000001e-05
          entropy: 0.03536697259793679
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03880040993293126
          total_loss: .inf
          vf_explained_var: -0.5879099369049072
          vf_loss: 1315286982.355677
    num_agent_steps_sampled: 918000
    num_agent_steps_trained: 918000
    num_steps_sampled: 918000
    num_steps_trained: 918000
  iterations_since_restore: 918
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,918,19950.3,918000,-2.0016,-1.73,-2.04,200.16




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 919000
  custom_metrics: {}
  date: 2021-10-24_01-33-39
  done: false
  episode_len_mean: 200.22
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0022000000000006
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1597
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1143.564691226369
          cur_lr: 5.000000000000001e-05
          entropy: 0.054156768663475914
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.07303592157032755
          total_loss: .inf
          vf_explained_var: -0.666792094707489
          vf_loss: 204233884.04444444
    num_agent_steps_sampled: 919000
    num_agent_steps_trained: 919000
    num_steps_sampled: 919000
    num_steps_trained: 919000
  iterations_since_restore: 919
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,919,20008.7,919000,-2.0022,-1.73,-2.04,200.22




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 920000
  custom_metrics: {}
  date: 2021-10-24_01-34-33
  done: false
  episode_len_mean: 200.51
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.005100000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1602
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1715.3470368395533
          cur_lr: 5.000000000000001e-05
          entropy: 0.061140690412786275
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.006083073798153135
          total_loss: .inf
          vf_explained_var: -0.6125234961509705
          vf_loss: 6468518920.661005
    num_agent_steps_sampled: 920000
    num_agent_steps_trained: 920000
    num_steps_sampled: 920000
    num_steps_trained: 920000
  iterations_since_restore: 920


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,920,20062.5,920000,-2.0051,-1.73,-2.04,200.51




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 921000
  custom_metrics: {}
  date: 2021-10-24_01-35-25
  done: false
  episode_len_mean: 200.52
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.005200000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1607
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2573.0205552593293
          cur_lr: 5.000000000000001e-05
          entropy: 0.05402828440484073
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04382370693816079
          total_loss: .inf
          vf_explained_var: -0.052028633654117584
          vf_loss: 1277911674828.9644
    num_agent_steps_sampled: 921000
    num_agent_steps_trained: 921000
    num_steps_sampled: 921000
    num_steps_trained: 921000
  iterations_since_restore: 921

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,921,20114.8,921000,-2.0052,-1.73,-2.04,200.52




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 922000
  custom_metrics: {}
  date: 2021-10-24_01-36-19
  done: false
  episode_len_mean: 200.53
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.005300000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1612
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3859.5308328889937
          cur_lr: 5.000000000000001e-05
          entropy: 0.040094449329707356
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07522451811366611
          total_loss: .inf
          vf_explained_var: -0.3548021614551544
          vf_loss: 143711511414.81308
    num_agent_steps_sampled: 922000
    num_agent_steps_trained: 922000
    num_steps_sampled: 922000
    num_steps_trained: 922000
  iterations_since_restore: 922


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,922,20168.6,922000,-2.0053,-1.73,-2.04,200.53




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 923000
  custom_metrics: {}
  date: 2021-10-24_01-37-18
  done: false
  episode_len_mean: 200.51
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.005100000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1617
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5789.296249333492
          cur_lr: 5.000000000000001e-05
          entropy: 0.06642592487235864
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0019321203076591094
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 42014339986.48889
    num_agent_steps_sampled: 923000
    num_agent_steps_trained: 923000
    num_steps_sampled: 923000
    num_steps_trained: 923000
  iterations_since_restore: 923
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,923,20227.7,923000,-2.0051,-1.73,-2.04,200.51




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 924000
  custom_metrics: {}
  date: 2021-10-24_01-38-33
  done: false
  episode_len_mean: 200.23
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.002300000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1622
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8683.944374000235
          cur_lr: 5.000000000000001e-05
          entropy: 0.047692653267747825
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.01191687070661121
          total_loss: .inf
          vf_explained_var: -0.6574805378913879
          vf_loss: 4144948482.94423
    num_agent_steps_sampled: 924000
    num_agent_steps_trained: 924000
    num_steps_sampled: 924000
    num_steps_trained: 924000
  iterations_since_restore: 924
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,924,20302.6,924000,-2.0023,-1.73,-2.04,200.23




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 925000
  custom_metrics: {}
  date: 2021-10-24_01-39-29
  done: false
  episode_len_mean: 200.22
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.002200000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1627
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 13025.916561000355
          cur_lr: 5.000000000000001e-05
          entropy: 0.057647999251882236
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.29736206067932974
          total_loss: .inf
          vf_explained_var: -0.44770243763923645
          vf_loss: 347103976678.4
    num_agent_steps_sampled: 925000
    num_agent_steps_trained: 925000
    num_steps_sampled: 925000
    num_steps_trained: 925000
  iterations_since_restore: 925
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,925,20358.5,925000,-2.0022,-1.73,-2.04,200.22




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 926000
  custom_metrics: {}
  date: 2021-10-24_01-40-24
  done: false
  episode_len_mean: 200.56
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.0056000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1632
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 19538.874841500536
          cur_lr: 5.000000000000001e-05
          entropy: 0.05133127628101243
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0016525351390656497
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 24401245826.133335
    num_agent_steps_sampled: 926000
    num_agent_steps_trained: 926000
    num_steps_sampled: 926000
    num_steps_trained: 926000
  iterations_since_restore: 926
  node_ip: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,926,20413.6,926000,-2.0056,-1.74,-2.04,200.56




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 927000
  custom_metrics: {}
  date: 2021-10-24_01-41-17
  done: false
  episode_len_mean: 200.55
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.0055000000000005
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1637
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 29308.312262250798
          cur_lr: 5.000000000000001e-05
          entropy: 0.03178696815641531
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06481298853953679
          total_loss: .inf
          vf_explained_var: -0.3765262961387634
          vf_loss: 1417152870.8444445
    num_agent_steps_sampled: 927000
    num_agent_steps_trained: 927000
    num_steps_sampled: 927000
    num_steps_trained: 927000
  iterations_since_restore: 927
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,927,20466.1,927000,-2.0055,-1.74,-2.04,200.55




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 928000
  custom_metrics: {}
  date: 2021-10-24_01-42-15
  done: false
  episode_len_mean: 200.58
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.005800000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1642
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 43962.468393376206
          cur_lr: 5.000000000000001e-05
          entropy: 0.06594096347689629
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.039346381463110446
          total_loss: .inf
          vf_explained_var: -0.3472285270690918
          vf_loss: 68543268076.8
    num_agent_steps_sampled: 928000
    num_agent_steps_trained: 928000
    num_steps_sampled: 928000
    num_steps_trained: 928000
  iterations_since_restore: 928
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,928,20524.6,928000,-2.0058,-1.74,-2.04,200.58


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 929000
  custom_metrics: {}
  date: 2021-10-24_01-43-13
  done: false
  episode_len_mean: 200.59
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.005900000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1647
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 65943.70259006428
          cur_lr: 5.000000000000001e-05
          entropy: 0.04231522379236089
          entropy_coeff: 0.009999999999999998
          kl: 5.56818778580443e-07
          policy_loss: -0.031163302560647328
          total_loss: 64372153810.48889
          vf_explained_var: 0.35494616627693176
          vf_loss: 64372153810.48889
    num_agent_steps_sampled: 929000
    num_agent_steps_trained: 929000
    num_steps_sampled: 929000
    num_steps_trained: 929000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,929,20582.1,929000,-2.0059,-1.74,-2.04,200.59




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 930000
  custom_metrics: {}
  date: 2021-10-24_01-44-21
  done: false
  episode_len_mean: 200.32
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.003200000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1652
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 32971.85129503214
          cur_lr: 5.000000000000001e-05
          entropy: 0.06640308226148288
          entropy_coeff: 0.009999999999999998
          kl: 0.0034115301836353733
          policy_loss: -0.005867846641275618
          total_loss: 478865157364.6222
          vf_explained_var: -0.9905139207839966
          vf_loss: 478865156636.44446
    num_agent_steps_sampled: 930000
    num_agent_steps_trained: 930000
    num_steps_sampled: 930000
    num_steps_trained: 930000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,930,20650.2,930000,-2.0032,-1.74,-2.04,200.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 931000
  custom_metrics: {}
  date: 2021-10-24_01-45-17
  done: false
  episode_len_mean: 200.31
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.003100000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1657
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 16485.92564751607
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0320654089252154
          total_loss: 836821108.6222222
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 836821108.6222222
    num_agent_steps_sampled: 931000
    num_agent_steps_trained: 931000
    num_steps_sampled: 931000
    num_steps_trained: 931000
  iterations_since_restore: 931
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,931,20706.4,931000,-2.0031,-1.74,-2.04,200.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 932000
  custom_metrics: {}
  date: 2021-10-24_01-46-07
  done: false
  episode_len_mean: 200.52
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.0052000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1662
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8242.962823758035
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.032065274814764656
          total_loss: 360579178.6666667
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 360579178.6666667
    num_agent_steps_sampled: 932000
    num_agent_steps_trained: 932000
    num_steps_sampled: 932000
    num_steps_trained: 932000
  iterations_since_restore: 932
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,932,20756.3,932000,-2.0052,-1.74,-2.04,200.52


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 933000
  custom_metrics: {}
  date: 2021-10-24_01-47-00
  done: false
  episode_len_mean: 200.46
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.004600000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1667
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4121.481411879017
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03206536918878555
          total_loss: 359546237.15555555
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 359546237.15555555
    num_agent_steps_sampled: 933000
    num_agent_steps_trained: 933000
    num_steps_sampled: 933000
    num_steps_trained: 933000
  iterations_since_restore: 933
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,933,20809.6,933000,-2.0046,-1.74,-2.04,200.46


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 934000
  custom_metrics: {}
  date: 2021-10-24_01-47-48
  done: false
  episode_len_mean: 200.39
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.003900000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1672
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2060.7407059395086
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03206528971592585
          total_loss: 358316532.26666665
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 358316532.26666665
    num_agent_steps_sampled: 934000
    num_agent_steps_trained: 934000
    num_steps_sampled: 934000
    num_steps_trained: 934000
  iterations_since_restore: 934
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,934,20857.4,934000,-2.0039,-1.74,-2.04,200.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 935000
  custom_metrics: {}
  date: 2021-10-24_01-48-39
  done: false
  episode_len_mean: 200.35
  episode_media: {}
  episode_reward_max: -1.7400000000000013
  episode_reward_mean: -2.0035000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1677
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1030.3703529697543
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03206536422173182
          total_loss: 357111242.6666667
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 357111242.6666667
    num_agent_steps_sampled: 935000
    num_agent_steps_trained: 935000
    num_steps_sampled: 935000
    num_steps_trained: 935000
  iterations_since_restore: 935
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,935,20908.5,935000,-2.0035,-1.74,-2.04,200.35




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 936000
  custom_metrics: {}
  date: 2021-10-24_01-49-48
  done: false
  episode_len_mean: 199.97
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9997000000000005
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1682
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 515.1851764848772
          cur_lr: 5.000000000000001e-05
          entropy: 0.0006769283312476344
          entropy_coeff: 0.009999999999999998
          kl: 5.565046416854279e-05
          policy_loss: -0.010453592985868454
          total_loss: 586309592691.9111
          vf_explained_var: -0.3333333730697632
          vf_loss: 586309592691.9111
    num_agent_steps_sampled: 936000
    num_agent_steps_trained: 936000
    num_steps_sampled: 936000
    num_steps_trained: 936000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,936,20977.5,936000,-1.9997,-1.73,-2.04,199.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 937000
  custom_metrics: {}
  date: 2021-10-24_01-50-38
  done: false
  episode_len_mean: 199.93
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9993000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1687
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 257.5925882424386
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03195765862862269
          total_loss: 480410730.3111111
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 480410730.3111111
    num_agent_steps_sampled: 937000
    num_agent_steps_trained: 937000
    num_steps_sampled: 937000
    num_steps_trained: 937000
  iterations_since_restore: 937
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,937,21026.7,937000,-1.9993,-1.73,-2.04,199.93


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 938000
  custom_metrics: {}
  date: 2021-10-24_01-51-34
  done: false
  episode_len_mean: 200.14
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0014000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1692
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 128.7962941212193
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03195775548617045
          total_loss: 351255688.17777777
          vf_explained_var: 0.0
          vf_loss: 351255688.17777777
    num_agent_steps_sampled: 938000
    num_agent_steps_trained: 938000
    num_steps_sampled: 938000
    num_steps_trained: 938000
  iterations_since_restore: 938
  node_ip: 172.17.0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,938,21082.8,938000,-2.0014,-1.73,-2.04,200.14


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 939000
  custom_metrics: {}
  date: 2021-10-24_01-52-28
  done: false
  episode_len_mean: 200.08
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.000800000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1697
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 64.39814706060965
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031957688430945076
          total_loss: 350059570.1333333
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 350059570.1333333
    num_agent_steps_sampled: 939000
    num_agent_steps_trained: 939000
    num_steps_sampled: 939000
    num_steps_trained: 939000
  iterations_since_restore: 939
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,939,21137.5,939000,-2.0008,-1.73,-2.04,200.08


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 940000
  custom_metrics: {}
  date: 2021-10-24_01-53-20
  done: false
  episode_len_mean: 200.01
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0001000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1702
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 32.19907353030482
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03195784240961075
          total_loss: 348420154.3111111
          vf_explained_var: -1.0596381549987655e-08
          vf_loss: 348420153.95555556
    num_agent_steps_sampled: 940000
    num_agent_steps_trained: 940000
    num_steps_sampled: 940000
    num_steps_trained: 940000
  iterations_since_restore: 940
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,940,21189.2,940000,-2.0001,-1.73,-2.04,200.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 941000
  custom_metrics: {}
  date: 2021-10-24_01-54-11
  done: false
  episode_len_mean: 199.91
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9991000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1707
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 16.09953676515241
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03195784613490105
          total_loss: 346800937.95555556
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 346800937.95555556
    num_agent_steps_sampled: 941000
    num_agent_steps_trained: 941000
    num_steps_sampled: 941000
    num_steps_trained: 941000
  iterations_since_restore: 941


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,941,21239.7,941000,-1.9991,-1.73,-2.04,199.91




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 942000
  custom_metrics: {}
  date: 2021-10-24_01-55-21
  done: false
  episode_len_mean: 199.53
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.995300000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1712
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.049768382576206
          cur_lr: 5.000000000000001e-05
          entropy: 0.00029010600733777713
          entropy_coeff: 0.009999999999999998
          kl: 1.3912111171521246e-05
          policy_loss: -0.030468960603078205
          total_loss: 907342336.7111111
          vf_explained_var: -0.227074533700943
          vf_loss: 907342336.7111111
    num_agent_steps_sampled: 942000
    num_agent_steps_trained: 942000
    num_steps_sampled: 942000
    num_steps_trained: 942000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,942,21309.7,942000,-1.9953,-1.73,-2.04,199.53


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 943000
  custom_metrics: {}
  date: 2021-10-24_01-56-11
  done: false
  episode_len_mean: 199.44
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.994400000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1717
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.024884191288103
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031855364640553795
          total_loss: 353039669.3333333
          vf_explained_var: 6.092919591083046e-08
          vf_loss: 353039669.3333333
    num_agent_steps_sampled: 943000
    num_agent_steps_trained: 943000
    num_steps_sampled: 943000
    num_steps_trained: 943000
  iterations_since_restore: 943
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,943,21360.2,943000,-1.9944,-1.73,-2.04,199.44


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 944000
  custom_metrics: {}
  date: 2021-10-24_01-57-05
  done: false
  episode_len_mean: 199.61
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9961000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1722
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0124420956440514
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03185531497001648
          total_loss: 342381815.82222223
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 342381815.82222223
    num_agent_steps_sampled: 944000
    num_agent_steps_trained: 944000
    num_steps_sampled: 944000
    num_steps_trained: 944000
  iterations_since_restore: 944


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,944,21414.3,944000,-1.9961,-1.73,-2.04,199.61


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 945000
  custom_metrics: {}
  date: 2021-10-24_01-57-54
  done: false
  episode_len_mean: 199.53
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.995300000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1727
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0062210478220257
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03185528516769409
          total_loss: 341000235.0222222
          vf_explained_var: 1.7881394143159923e-08
          vf_loss: 341000235.0222222
    num_agent_steps_sampled: 945000
    num_agent_steps_trained: 945000
    num_steps_sampled: 945000
    num_steps_trained: 945000
  iterations_since_restore: 945
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,945,21462.6,945000,-1.9953,-1.73,-2.04,199.53


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 946000
  custom_metrics: {}
  date: 2021-10-24_01-58-44
  done: false
  episode_len_mean: 199.44
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.994400000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 1732
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.5031105239110129
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03185531745354334
          total_loss: 339612617.6
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 339612617.6
    num_agent_steps_sampled: 946000
    num_agent_steps_trained: 946000
    num_steps_sampled: 946000
    num_steps_trained: 946000
  iterations_since_restore: 946
  node_ip: 172

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,946,21513.3,946000,-1.9944,-1.73,-2.03,199.44


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 947000
  custom_metrics: {}
  date: 2021-10-24_01-59-38
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 1737
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.2515552619555064
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03185531993707021
          total_loss: 338212502.04444444
          vf_explained_var: -6.225373994084293e-08
          vf_loss: 338212502.04444444
    num_agent_steps_sampled: 947000
    num_agent_steps_trained: 947000
    num_steps_sampled: 947000
    num_steps_trained: 947000
  iterations_since_restore: 947
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,947,21567.1,947000,-1.994,-1.73,-2.03,199.4




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 948000
  custom_metrics: {}
  date: 2021-10-24_02-00-53
  done: false
  episode_len_mean: 199.01
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9901000000000009
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 1742
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.1257776309777532
          cur_lr: 5.000000000000001e-05
          entropy: 0.0002900853048130456
          entropy_coeff: 0.009999999999999998
          kl: 4.173154011368752e-05
          policy_loss: -0.015342349807421366
          total_loss: 19342507537.77778
          vf_explained_var: -0.2771817445755005
          vf_loss: 19342507537.77778
    num_agent_steps_sampled: 948000
    num_agent_steps_trained: 948000
    num_steps_sampled: 948000
    num_steps_trained: 948000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,948,21641.4,948000,-1.9901,-1.7,-2.03,199.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 949000
  custom_metrics: {}
  date: 2021-10-24_02-01-51
  done: false
  episode_len_mean: 198.93
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.989300000000001
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 1747
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0628888154888766
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031311569114526115
          total_loss: 345951833.6
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 345951833.6
    num_agent_steps_sampled: 949000
    num_agent_steps_trained: 949000
    num_steps_sampled: 949000
    num_steps_trained: 949000
  iterations_since_restore: 949
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,949,21699.9,949000,-1.9893,-1.7,-2.01,198.93


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 950000
  custom_metrics: {}
  date: 2021-10-24_02-02-42
  done: false
  episode_len_mean: 199.16
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991600000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 1752
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0314444077444383
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031311631202697754
          total_loss: 329042721.06666666
          vf_explained_var: 2.8477774804969158e-08
          vf_loss: 329042721.06666666
    num_agent_steps_sampled: 950000
    num_agent_steps_trained: 950000
    num_steps_sampled: 950000
    num_steps_trained: 950000
  iterations_since_restore: 950


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,950,21750.9,950000,-1.9916,-1.7,-2,199.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 951000
  custom_metrics: {}
  date: 2021-10-24_02-03-37
  done: false
  episode_len_mean: 199.16
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991600000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 1757
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.01572220387221915
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03131161878506342
          total_loss: 327791866.6666667
          vf_explained_var: 2.6490953430879927e-08
          vf_loss: 327791866.6666667
    num_agent_steps_sampled: 951000
    num_agent_steps_trained: 951000
    num_steps_sampled: 951000
    num_steps_trained: 951000
  iterations_since_restore: 951
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,951,21805.7,951000,-1.9916,-1.7,-2,199.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 952000
  custom_metrics: {}
  date: 2021-10-24_02-04-32
  done: false
  episode_len_mean: 199.16
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991600000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 1762
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.007861101936109576
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03131150330106417
          total_loss: 326263186.84444445
          vf_explained_var: 1.2583202924076886e-08
          vf_loss: 326263186.84444445
    num_agent_steps_sampled: 952000
    num_agent_steps_trained: 952000
    num_steps_sampled: 952000
    num_steps_trained: 952000
  iterations_since_restore: 952

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,952,21861.1,952000,-1.9916,-1.7,-2,199.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 953000
  custom_metrics: {}
  date: 2021-10-24_02-05-23
  done: false
  episode_len_mean: 199.16
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991600000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 1767
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003930550968054788
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03131159394979477
          total_loss: 324724702.93333334
          vf_explained_var: -7.020102543719986e-08
          vf_loss: 324724702.5777778
    num_agent_steps_sampled: 953000
    num_agent_steps_trained: 953000
    num_steps_sampled: 953000
    num_steps_trained: 953000
  iterations_since_restore: 953


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,953,21911.3,953000,-1.9916,-1.7,-2,199.16




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 954000
  custom_metrics: {}
  date: 2021-10-24_02-06-31
  done: false
  episode_len_mean: 198.91
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9891000000000005
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 1772
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001965275484027394
          cur_lr: 5.000000000000001e-05
          entropy: 0.03268394147356351
          entropy_coeff: 0.009999999999999998
          kl: 1.391703229646583e-05
          policy_loss: -0.18312788440121544
          total_loss: 1425475003.021427
          vf_explained_var: -0.2836979627609253
          vf_loss: 1425475002.819613
    num_agent_steps_sampled: 954000
    num_agent_steps_trained: 954000
    num_steps_sampled: 954000
    num_steps_trained: 954000
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,954,21980,954000,-1.9891,-1.7,-2.01,198.91




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 955000
  custom_metrics: {}
  date: 2021-10-24_02-07-21
  done: false
  episode_len_mean: 199.0
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.990000000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1777
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.000982637742013697
          cur_lr: 5.000000000000001e-05
          entropy: 0.024491078717013202
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.06441643651988771
          total_loss: .inf
          vf_explained_var: -0.04333019629120827
          vf_loss: 167654959696.91275
    num_agent_steps_sampled: 955000
    num_agent_steps_trained: 955000
    num_steps_sampled: 955000
    num_steps_trained: 955000
  iterations_since_restore: 95

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,955,22030,955000,-1.99,-1.7,-2.04,199


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 956000
  custom_metrics: {}
  date: 2021-10-24_02-08-18
  done: false
  episode_len_mean: 199.34
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.993400000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1782
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0014739566130205458
          cur_lr: 5.000000000000001e-05
          entropy: 0.051541996209157837
          entropy_coeff: 0.009999999999999998
          kl: 0.0001808600830792965
          policy_loss: -0.08589898943901061
          total_loss: 26155408223800.89
          vf_explained_var: -0.010715708136558533
          vf_loss: 26155408223800.89
    num_agent_steps_sampled: 956000
    num_agent_steps_trained: 956000
    num_steps_sampled: 956000
    num_steps_trained: 95600

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,956,22086.3,956000,-1.9934,-1.7,-2.04,199.34




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 957000
  custom_metrics: {}
  date: 2021-10-24_02-09-11
  done: false
  episode_len_mean: 199.46
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9946000000000013
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1787
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0007369783065102729
          cur_lr: 5.000000000000001e-05
          entropy: 0.05482946261763573
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.024264921165174907
          total_loss: .inf
          vf_explained_var: -0.666927695274353
          vf_loss: 121576299133.83266
    num_agent_steps_sampled: 957000
    num_agent_steps_trained: 957000
    num_steps_sampled: 957000
    num_steps_trained: 957000
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,957,22139.6,957000,-1.9946,-1.7,-2.05,199.46




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 958000
  custom_metrics: {}
  date: 2021-10-24_02-10-04
  done: false
  episode_len_mean: 199.53
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9953000000000012
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1792
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001105467459765409
          cur_lr: 5.000000000000001e-05
          entropy: 0.0368430409166548
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04160598665475845
          total_loss: .inf
          vf_explained_var: 0.0900673195719719
          vf_loss: 7328712829.511111
    num_agent_steps_sampled: 958000
    num_agent_steps_trained: 958000
    num_steps_sampled: 958000
    num_steps_trained: 958000
  iterations_since_restore: 958
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,958,22192.9,958000,-1.9953,-1.7,-2.05,199.53




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 959000
  custom_metrics: {}
  date: 2021-10-24_02-11-05
  done: false
  episode_len_mean: 199.6
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9960000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1797
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.001658201189648114
          cur_lr: 5.000000000000001e-05
          entropy: 0.03703614856219954
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0700045429997974
          total_loss: .inf
          vf_explained_var: -0.0279217716306448
          vf_loss: 2795481976593.067
    num_agent_steps_sampled: 959000
    num_agent_steps_trained: 959000
    num_steps_sampled: 959000
    num_steps_trained: 959000
  iterations_since_restore: 959
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,959,22253.2,959000,-1.996,-1.7,-2.05,199.6




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 960000
  custom_metrics: {}
  date: 2021-10-24_02-12-18
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9938000000000013
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1802
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.002487301784472171
          cur_lr: 5.000000000000001e-05
          entropy: 0.029009928016199005
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.13756238718827565
          total_loss: .inf
          vf_explained_var: -0.6666666865348816
          vf_loss: 36949225281.422226
    num_agent_steps_sampled: 960000
    num_agent_steps_trained: 960000
    num_steps_sampled: 960000
    num_steps_trained: 960000
  iterations_since_restore: 96

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,960,22326.2,960000,-1.9938,-1.7,-2.05,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 961000
  custom_metrics: {}
  date: 2021-10-24_02-13-08
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9938000000000011
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1807
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.003730952676708255
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03108985722064972
          total_loss: 448808950533.6889
          vf_explained_var: 2.6490953430879927e-08
          vf_loss: 448808950533.6889
    num_agent_steps_sampled: 961000
    num_agent_steps_trained: 961000
    num_steps_sampled: 961000
    num_steps_trained: 961000
  iterations_since_restore: 961


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,961,22376.4,961000,-1.9938,-1.7,-2.05,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 962000
  custom_metrics: {}
  date: 2021-10-24_02-14-07
  done: false
  episode_len_mean: 199.65
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9965000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1812
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0018654763383541275
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03108970324198405
          total_loss: 574613845.3333334
          vf_explained_var: 0.0
          vf_loss: 574613845.3333334
    num_agent_steps_sampled: 962000
    num_agent_steps_trained: 962000
    num_steps_sampled: 962000
    num_steps_trained: 962000
  iterations_since_restore: 962
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,962,22435.3,962000,-1.9965,-1.7,-2.05,199.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 963000
  custom_metrics: {}
  date: 2021-10-24_02-15-01
  done: false
  episode_len_mean: 199.65
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9965000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1817
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0009327381691770638
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031089715659618378
          total_loss: 564401040.0
          vf_explained_var: 0.0
          vf_loss: 564401040.0
    num_agent_steps_sampled: 963000
    num_agent_steps_trained: 963000
    num_steps_sampled: 963000
    num_steps_trained: 963000
  iterations_since_restore: 963
  node_ip: 172.17.0.2
  num_h

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,963,22489.2,963000,-1.9965,-1.7,-2.05,199.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 964000
  custom_metrics: {}
  date: 2021-10-24_02-15-49
  done: false
  episode_len_mean: 199.65
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9965000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1822
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0004663690845885319
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03108960638443629
          total_loss: 563558020.2666667
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 563558020.2666667
    num_agent_steps_sampled: 964000
    num_agent_steps_trained: 964000
    num_steps_sampled: 964000
    num_steps_trained: 964000
  iterations_since_restore: 96

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,964,22536.9,964000,-1.9965,-1.7,-2.05,199.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 965000
  custom_metrics: {}
  date: 2021-10-24_02-16-42
  done: false
  episode_len_mean: 199.65
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9965000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1827
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00023318454229426594
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031089576582113903
          total_loss: 562894572.4444444
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 562894572.4444444
    num_agent_steps_sampled: 965000
    num_agent_steps_trained: 965000
    num_steps_sampled: 965000
    num_steps_trained: 965000
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,965,22589.9,965000,-1.9965,-1.7,-2.05,199.65




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 966000
  custom_metrics: {}
  date: 2021-10-24_02-17-50
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1832
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.00011659227114713297
          cur_lr: 5.000000000000001e-05
          entropy: 0.014988486179047161
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.029478850298457676
          total_loss: .inf
          vf_explained_var: -0.6277964115142822
          vf_loss: 4072093984.0
    num_agent_steps_sampled: 966000
    num_agent_steps_trained: 966000
    num_steps_sampled: 966000
    num_steps_trained: 966000
  iterations_since_restore: 966
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,966,22658.3,966000,-1.994,-1.7,-2.05,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 967000
  custom_metrics: {}
  date: 2021-10-24_02-18-39
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1837
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.0001748884067206995
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.028502076864242554
          total_loss: 1789933580.8
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 1789933580.8
    num_agent_steps_sampled: 967000
    num_agent_steps_trained: 967000
    num_steps_sampled: 967000
    num_steps_trained: 967000
  iterations_since_restore: 967
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,967,22707.5,967000,-1.994,-1.7,-2.05,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 968000
  custom_metrics: {}
  date: 2021-10-24_02-19-31
  done: false
  episode_len_mean: 199.7
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.997000000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1842
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.744420336034974e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.028502099215984344
          total_loss: 529922069.3333333
          vf_explained_var: 0.0
          vf_loss: 529922069.3333333
    num_agent_steps_sampled: 968000
    num_agent_steps_trained: 968000
    num_steps_sampled: 968000
    num_steps_trained: 968000
  iterations_since_restore: 968
  node_ip: 172.17.0

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,968,22758.8,968000,-1.997,-1.73,-2.05,199.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 969000
  custom_metrics: {}
  date: 2021-10-24_02-20-24
  done: false
  episode_len_mean: 199.7
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.997000000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1847
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.372210168017487e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.028501843412717182
          total_loss: 530897055.2888889
          vf_explained_var: 1.4570024298166118e-08
          vf_loss: 530897055.2888889
    num_agent_steps_sampled: 969000
    num_agent_steps_trained: 969000
    num_steps_sampled: 969000
    num_steps_trained: 969000
  iterations_since_restore: 969


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,969,22812.1,969000,-1.997,-1.73,-2.05,199.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 970000
  custom_metrics: {}
  date: 2021-10-24_02-21-12
  done: false
  episode_len_mean: 199.7
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.997000000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1852
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1861050840087436e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.028501870731512707
          total_loss: 529749195.73333335
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 529749195.73333335
    num_agent_steps_sampled: 970000
    num_agent_steps_trained: 970000
    num_steps_sampled: 970000
    num_steps_trained: 970000
  iterations_since_restore: 97

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,970,22860.7,970000,-1.997,-1.73,-2.05,199.7


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 971000
  custom_metrics: {}
  date: 2021-10-24_02-22-08
  done: false
  episode_len_mean: 199.7
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.997000000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1857
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0930525420043718e-05
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02850193778673808
          total_loss: 528636990.2222222
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 528636990.2222222
    num_agent_steps_sampled: 971000
    num_agent_steps_trained: 971000
    num_steps_sampled: 971000
    num_steps_trained: 971000
  iterations_since_restore: 971
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,971,22915.8,971000,-1.997,-1.73,-2.05,199.7




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 972000
  custom_metrics: {}
  date: 2021-10-24_02-23-19
  done: false
  episode_len_mean: 199.43
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9943000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 6
  episodes_total: 1863
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.465262710021859e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.0002900994764382227
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.05729843179384867
          total_loss: .inf
          vf_explained_var: -0.3333333432674408
          vf_loss: 818676845.8666667
    num_agent_steps_sampled: 972000
    num_agent_steps_trained: 972000
    num_steps_sampled: 972000
    num_steps_trained: 972000
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,972,22986.8,972000,-1.9943,-1.73,-2.05,199.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 973000
  custom_metrics: {}
  date: 2021-10-24_02-24-08
  done: false
  episode_len_mean: 199.43
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9943000000000006
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1868
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.197894065032788e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.08041438708702724
          total_loss: 558454592.0
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 558454592.0
    num_agent_steps_sampled: 973000
    num_agent_steps_trained: 973000
    num_steps_sampled: 973000
    num_steps_trained: 973000
  iterations_since_restore: 973
  node_ip: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,973,23035.7,973000,-1.9943,-1.73,-2.05,199.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 974000
  custom_metrics: {}
  date: 2021-10-24_02-25-00
  done: false
  episode_len_mean: 199.64
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.996400000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1873
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.098947032516394e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.08041439205408096
          total_loss: 495801607.1111111
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 495801607.1111111
    num_agent_steps_sampled: 974000
    num_agent_steps_trained: 974000
    num_steps_sampled: 974000
    num_steps_trained: 974000
  iterations_since_restore: 974
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,974,23088.4,974000,-1.9964,-1.73,-2.05,199.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 975000
  custom_metrics: {}
  date: 2021-10-24_02-25-50
  done: false
  episode_len_mean: 199.58
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.995800000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1878
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.049473516258197e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.08041440695524216
          total_loss: 494056505.95555556
          vf_explained_var: 0.0
          vf_loss: 494056505.95555556
    num_agent_steps_sampled: 975000
    num_agent_steps_trained: 975000
    num_steps_sampled: 975000
    num_steps_trained: 975000
  iterations_since_restore: 975
  node_ip: 172.17.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,975,23138.1,975000,-1.9958,-1.73,-2.05,199.58


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 976000
  custom_metrics: {}
  date: 2021-10-24_02-26-37
  done: false
  episode_len_mean: 199.5
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9950000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 1883
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0247367581290985e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.08041434486707051
          total_loss: 492751297.4222222
          vf_explained_var: 6.225373994084293e-08
          vf_loss: 492751297.4222222
    num_agent_steps_sampled: 976000
    num_agent_steps_trained: 976000
    num_steps_sampled: 976000
    num_steps_trained: 976000
  iterations_since_restore: 976
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,976,23185.2,976000,-1.995,-1.73,-2.05,199.5


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 977000
  custom_metrics: {}
  date: 2021-10-24_02-27-26
  done: false
  episode_len_mean: 199.37
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9937000000000011
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 1888
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.123683790645492e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.080414483944575
          total_loss: 491384323.9111111
          vf_explained_var: 6.092919591083046e-08
          vf_loss: 491384323.9111111
    num_agent_steps_sampled: 977000
    num_agent_steps_trained: 977000
    num_steps_sampled: 977000
    num_steps_trained: 977000
  iterations_since_restore: 977
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,977,23233.7,977000,-1.9937,-1.73,-2.03,199.37




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 978000
  custom_metrics: {}
  date: 2021-10-24_02-28-33
  done: false
  episode_len_mean: 199.15
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9915000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1893
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.561841895322746e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.020114379852182337
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.11302788348661529
          total_loss: .inf
          vf_explained_var: -0.6666666865348816
          vf_loss: 14333637672.88889
    num_agent_steps_sampled: 978000
    num_agent_steps_trained: 978000
    num_steps_sampled: 978000
    num_steps_trained: 978000
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,978,23300.8,978000,-1.9915,-1.73,-2.04,199.15




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 979000
  custom_metrics: {}
  date: 2021-10-24_02-29-25
  done: false
  episode_len_mean: 199.1
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9910000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1898
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.84276284298412e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.008412908888163251
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.15940489371617636
          total_loss: .inf
          vf_explained_var: -0.46805623173713684
          vf_loss: 176048344274.4889
    num_agent_steps_sampled: 979000
    num_agent_steps_trained: 979000
    num_steps_sampled: 979000
    num_steps_trained: 979000
  iterations_since_restore: 979

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,979,23353.5,979000,-1.991,-1.73,-2.04,199.1




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 980000
  custom_metrics: {}
  date: 2021-10-24_02-30-21
  done: false
  episode_len_mean: 199.42
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9942000000000004
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1903
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.764144264476179e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.054151619970798495
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.006958185136318207
          total_loss: .inf
          vf_explained_var: 0.10648227483034134
          vf_loss: 370104896509.1556
    num_agent_steps_sampled: 980000
    num_agent_steps_trained: 980000
    num_steps_sampled: 980000
    num_steps_trained: 980000
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,980,23409.3,980000,-1.9942,-1.73,-2.04,199.42




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 981000
  custom_metrics: {}
  date: 2021-10-24_02-31-22
  done: false
  episode_len_mean: 199.52
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.995200000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 4
  episodes_total: 1907
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.646216396714272e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.03887146367018835
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.07005935943064591
          total_loss: .inf
          vf_explained_var: -0.094842329621315
          vf_loss: 56972234401.066666
    num_agent_steps_sampled: 981000
    num_agent_steps_trained: 981000
    num_steps_sampled: 981000
    num_steps_trained: 981000
  iterations_since_restore: 981


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,981,23470,981000,-1.9952,-1.73,-2.04,199.52




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 982000
  custom_metrics: {}
  date: 2021-10-24_02-32-15
  done: false
  episode_len_mean: 199.57
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9957000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1912
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2969324595071408e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.043508987708224194
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0007399926738192638
          total_loss: .inf
          vf_explained_var: 0.19705899059772491
          vf_loss: 11000810974.933332
    num_agent_steps_sampled: 982000
    num_agent_steps_trained: 982000
    num_steps_sampled: 982000
    num_steps_trained: 982000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,982,23523,982000,-1.9957,-1.73,-2.04,199.57


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 983000
  custom_metrics: {}
  date: 2021-10-24_02-33-08
  done: false
  episode_len_mean: 199.65
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9965000000000006
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1917
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9453986892607115e-06
          cur_lr: 5.000000000000001e-05
          entropy: 0.05404501925739977
          entropy_coeff: 0.009999999999999998
          kl: -1.130172497562951e-08
          policy_loss: -0.18238968998193741
          total_loss: 1024124556047.5555
          vf_explained_var: 0.3355543315410614
          vf_loss: 1024124556047.6
    num_agent_steps_sampled: 983000
    num_agent_steps_trained: 983000
    num_steps_sampled: 983000
    num_steps_trained: 983000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,983,23576.4,983000,-1.9965,-1.73,-2.04,199.65




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 984000
  custom_metrics: {}
  date: 2021-10-24_02-34-26
  done: false
  episode_len_mean: 199.41
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.994100000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 6
  episodes_total: 1923
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.726993446303557e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.036350490442580645
          entropy_coeff: 0.009999999999999998
          kl: 1.3897326820918845e-05
          policy_loss: -0.01750745375951131
          total_loss: 4066517036509.8667
          vf_explained_var: 0.47983160614967346
          vf_loss: 4066517036509.8667
    num_agent_steps_sampled: 984000
    num_agent_steps_trained: 984000
    num_steps_sampled: 984000
    num_steps_trained: 9840

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,984,23653.9,984000,-1.9941,-1.73,-2.04,199.41




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 985000
  custom_metrics: {}
  date: 2021-10-24_02-35-12
  done: false
  episode_len_mean: 199.41
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9941000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1928
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.863496723151779e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06378244111935298
          total_loss: .inf
          vf_explained_var: -1.0728836059570312e-06
          vf_loss: 6912409856.0
    num_agent_steps_sampled: 985000
    num_agent_steps_trained: 985000
    num_steps_sampled: 985000
    num_steps_trained: 985000
  iterations_since_restore: 985
  node_ip: 172.1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,985,23700.1,985000,-1.9941,-1.73,-2.04,199.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 986000
  custom_metrics: {}
  date: 2021-10-24_02-36-03
  done: false
  episode_len_mean: 199.66
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.996600000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1933
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.295245084727666e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.0637813111146291
          total_loss: 564147434.6666666
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 564147434.6666666
    num_agent_steps_sampled: 986000
    num_agent_steps_trained: 986000
    num_steps_sampled: 986000
    num_steps_trained: 986000
  iterations_since_restore: 986
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,986,23751.3,986000,-1.9966,-1.73,-2.04,199.66


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 987000
  custom_metrics: {}
  date: 2021-10-24_02-36-54
  done: false
  episode_len_mean: 199.66
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9966000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1938
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.647622542363833e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.06378138810396194
          total_loss: 557630378.6666666
          vf_explained_var: 7.947286206899662e-08
          vf_loss: 557630378.6666666
    num_agent_steps_sampled: 987000
    num_agent_steps_trained: 987000
    num_steps_sampled: 987000
    num_steps_trained: 987000
  iterations_since_restore: 987
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,987,23801.7,987000,-1.9966,-1.73,-2.04,199.66


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 988000
  custom_metrics: {}
  date: 2021-10-24_02-37-45
  done: false
  episode_len_mean: 199.66
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9966000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1943
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8238112711819165e-07
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.0637811745206515
          total_loss: 556745959.8222222
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 556745959.8222222
    num_agent_steps_sampled: 988000
    num_agent_steps_trained: 988000
    num_steps_sampled: 988000
    num_steps_trained: 988000
  iterations_since_restore: 988

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,988,23852.9,988000,-1.9966,-1.73,-2.04,199.66


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 989000
  custom_metrics: {}
  date: 2021-10-24_02-38-34
  done: false
  episode_len_mean: 199.66
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.996600000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1948
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.119056355909583e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.06378125647703807
          total_loss: 555748001.4222223
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 555748001.4222223
    num_agent_steps_sampled: 989000
    num_agent_steps_trained: 989000
    num_steps_sampled: 989000
    num_steps_trained: 989000
  iterations_since_restore: 989
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,989,23901.7,989000,-1.9966,-1.73,-2.04,199.66




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 990000
  custom_metrics: {}
  date: 2021-10-24_02-39-44
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1953
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.559528177954791e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.00038694493058654994
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02870160589615504
          total_loss: .inf
          vf_explained_var: -0.333333283662796
          vf_loss: 976532361.2444445
    num_agent_steps_sampled: 990000
    num_agent_steps_trained: 990000
    num_steps_sampled: 990000
    num_steps_trained: 990000
  iterations_since_restore: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,990,23971.8,990000,-1.994,-1.73,-2.04,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 991000
  custom_metrics: {}
  date: 2021-10-24_02-40-34
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1958
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.839292266932184e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03252295404672623
          total_loss: 720824152.8888888
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 720824152.8888888
    num_agent_steps_sampled: 991000
    num_agent_steps_trained: 991000
    num_steps_sampled: 991000
    num_steps_trained: 991000
  iterations_since_restore: 991


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,991,24021.6,991000,-1.994,-1.73,-2.04,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 992000
  custom_metrics: {}
  date: 2021-10-24_02-41-25
  done: false
  episode_len_mean: 199.67
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9967000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1963
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.419646133466092e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0325227826833725
          total_loss: 636407241.6
          vf_explained_var: 1.1126200405442432e-07
          vf_loss: 636407241.6
    num_agent_steps_sampled: 992000
    num_agent_steps_trained: 992000
    num_steps_sampled: 992000
    num_steps_trained: 992000
  iterations_since_restore: 992
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,992,24072.5,992000,-1.9967,-1.73,-2.04,199.67


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 993000
  custom_metrics: {}
  date: 2021-10-24_02-42-15
  done: false
  episode_len_mean: 199.67
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9967000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1968
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.709823066733046e-08
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03252298136552175
          total_loss: 634771787.0222223
          vf_explained_var: 1.5894572769070692e-08
          vf_loss: 634771787.0222223
    num_agent_steps_sampled: 993000
    num_agent_steps_trained: 993000
    num_steps_sampled: 993000
    num_steps_trained: 993000
  iterations_since_restore: 993

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,993,24122.3,993000,-1.9967,-1.73,-2.04,199.67


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 994000
  custom_metrics: {}
  date: 2021-10-24_02-43-04
  done: false
  episode_len_mean: 199.67
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9967000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1973
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.54911533366523e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03252282987038294
          total_loss: 633255223.8222222
          vf_explained_var: -5.430645444448601e-08
          vf_loss: 633255223.8222222
    num_agent_steps_sampled: 994000
    num_agent_steps_trained: 994000
    num_steps_sampled: 994000
    num_steps_trained: 994000
  iterations_since_restore: 994


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,994,24172.1,994000,-1.9967,-1.73,-2.04,199.67


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 995000
  custom_metrics: {}
  date: 2021-10-24_02-43-57
  done: false
  episode_len_mean: 199.67
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9967000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1978
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.274557666832615e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03252297888199488
          total_loss: 631644820.2666667
          vf_explained_var: -2.3841858265427618e-08
          vf_loss: 631644820.2666667
    num_agent_steps_sampled: 995000
    num_agent_steps_trained: 995000
    num_steps_sampled: 995000
    num_steps_trained: 995000
  iterations_since_restore: 99

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,995,24224.3,995000,-1.9967,-1.73,-2.04,199.67




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 996000
  custom_metrics: {}
  date: 2021-10-24_02-45-09
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1983
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1372788334163075e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.00047802778038506707
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.025362879700130885
          total_loss: .inf
          vf_explained_var: -0.3333333432674408
          vf_loss: 1104084321.4222221
    num_agent_steps_sampled: 996000
    num_agent_steps_trained: 996000
    num_steps_sampled: 996000
    num_steps_trained: 996000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,996,24297,996000,-1.994,-1.73,-2.04,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 997000
  custom_metrics: {}
  date: 2021-10-24_02-46-00
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1988
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2059182501244616e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03205831597248713
          total_loss: 709283566.9333333
          vf_explained_var: 5.298190774993827e-09
          vf_loss: 709283566.9333333
    num_agent_steps_sampled: 997000
    num_agent_steps_trained: 997000
    num_steps_sampled: 997000
    num_steps_trained: 997000
  iterations_since_restore: 997


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,997,24347.8,997000,-1.994,-1.73,-2.04,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 998000
  custom_metrics: {}
  date: 2021-10-24_02-46-51
  done: false
  episode_len_mean: 199.58
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.995800000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1993
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6029591250622308e-09
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03205829610427221
          total_loss: 623080729.9555556
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 623080729.9555556
    num_agent_steps_sampled: 998000
    num_agent_steps_trained: 998000
    num_steps_sampled: 998000
    num_steps_trained: 998000
  iterations_since_restore: 998

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,998,24398.8,998000,-1.9958,-1.73,-2.04,199.58


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 999000
  custom_metrics: {}
  date: 2021-10-24_02-47-42
  done: false
  episode_len_mean: 199.54
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.995400000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 1998
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.014795625311154e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03205836067597071
          total_loss: 620546715.0222223
          vf_explained_var: 1.0728835775353218e-07
          vf_loss: 620546715.0222223
    num_agent_steps_sampled: 999000
    num_agent_steps_trained: 999000
    num_steps_sampled: 999000
    num_steps_trained: 999000
  iterations_since_restore: 999


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,999,24449.5,999000,-1.9954,-1.73,-2.04,199.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1000000
  custom_metrics: {}
  date: 2021-10-24_02-48-36
  done: false
  episode_len_mean: 199.46
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.994600000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2003
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.007397812655577e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03205838551123937
          total_loss: 618284266.6666666
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 618284266.6666666
    num_agent_steps_sampled: 1000000
    num_agent_steps_trained: 1000000
    num_steps_sampled: 1000000
    num_steps_trained: 1000000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1000,24503.5,1000000,-1.9946,-1.73,-2.04,199.46


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1001000
  custom_metrics: {}
  date: 2021-10-24_02-49-25
  done: false
  episode_len_mean: 199.36
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9936000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2008
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0036989063277885e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03205841283003489
          total_loss: 616024339.5555556
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 616024339.5555556
    num_agent_steps_sampled: 1001000
    num_agent_steps_trained: 1001000
    num_steps_sampled: 1001000
    num_steps_trained: 1001000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1001,24552.7,1001000,-1.9936,-1.73,-2.04,199.36




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1002000
  custom_metrics: {}
  date: 2021-10-24_02-50-36
  done: false
  episode_len_mean: 199.07
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.990700000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2013
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0018494531638943e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.03471084096365505
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.017427750718262462
          total_loss: .inf
          vf_explained_var: -0.6666666865348816
          vf_loss: 117626995889.42223
    num_agent_steps_sampled: 1002000
    num_agent_steps_trained: 1002000
    num_steps_sampled: 1002000
    num_steps_trained: 1002000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1002,24623.3,1002000,-1.9907,-1.73,-2.04,199.07




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1003000
  custom_metrics: {}
  date: 2021-10-24_02-51-26
  done: false
  episode_len_mean: 199.07
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.990700000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2018
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.502774179745842e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.027655117420686617
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.10788360858956973
          total_loss: .inf
          vf_explained_var: 0.04076499491930008
          vf_loss: 2258070001709.511
    num_agent_steps_sampled: 1003000
    num_agent_steps_trained: 1003000
    num_steps_sampled: 1003000
    num_steps_trained: 1003000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1003,24673.8,1003000,-1.9907,-1.73,-2.03,199.07




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1004000
  custom_metrics: {}
  date: 2021-10-24_02-52-26
  done: false
  episode_len_mean: 199.42
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9942000000000006
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2023
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.254161269618762e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.028221609940131506
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.018506408482789994
          total_loss: .inf
          vf_explained_var: -0.6674466729164124
          vf_loss: 95227704376.88889
    num_agent_steps_sampled: 1004000
    num_agent_steps_trained: 1004000
    num_steps_sampled: 1004000
    num_steps_trained: 1004000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1004,24733.1,1004000,-1.9942,-1.73,-2.04,199.42


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1005000
  custom_metrics: {}
  date: 2021-10-24_02-53-18
  done: false
  episode_len_mean: 199.43
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9943000000000013
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2028
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3812419044281456e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.013143366202712059
          entropy_coeff: 0.009999999999999998
          kl: 3.0277262948648814e-08
          policy_loss: 0.03426423172156016
          total_loss: 40426953642.666664
          vf_explained_var: 0.3313864767551422
          vf_loss: 40426953642.666664
    num_agent_steps_sampled: 1005000
    num_agent_steps_trained: 1005000
    num_steps_sampled: 1005000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1005,24785.6,1005000,-1.9943,-1.73,-2.04,199.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1006000
  custom_metrics: {}
  date: 2021-10-24_02-54-07
  done: false
  episode_len_mean: 199.43
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9943000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2033
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6906209522140728e-10
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.032034143805503845
          total_loss: 11975032342.755556
          vf_explained_var: 9.139378676081833e-08
          vf_loss: 11975032342.755556
    num_agent_steps_sampled: 1006000
    num_agent_steps_trained: 1006000
    num_steps_sampled: 1006000
    num_steps_trained: 1006000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1006,24834.6,1006000,-1.9943,-1.73,-2.04,199.43


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1007000
  custom_metrics: {}
  date: 2021-10-24_02-55-00
  done: false
  episode_len_mean: 199.43
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9943000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2038
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.453104761070364e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03203414132197698
          total_loss: 746431232.0
          vf_explained_var: 1.390775050680304e-08
          vf_loss: 746431232.0
    num_agent_steps_sampled: 1007000
    num_agent_steps_trained: 1007000
    num_steps_sampled: 1007000
    num_steps_trained: 1007000
  iterations_since_restore: 1007
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1007,24886.9,1007000,-1.9943,-1.73,-2.04,199.43




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1008000
  custom_metrics: {}
  date: 2021-10-24_02-56-07
  done: false
  episode_len_mean: 199.15
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991500000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2043
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.226552380535182e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.005315089639690187
          entropy_coeff: 0.009999999999999998
          kl: 0.0007367279768610995
          policy_loss: 0.1069622642464108
          total_loss: 13439998361041.777
          vf_explained_var: 0.034932151436805725
          vf_loss: 13439998361041.777
    num_agent_steps_sampled: 1008000
    num_agent_steps_trained: 1008000
    num_steps_sampled: 1008000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1008,24954.1,1008000,-1.9915,-1.72,-2.04,199.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1009000
  custom_metrics: {}
  date: 2021-10-24_02-56-59
  done: false
  episode_len_mean: 199.15
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991500000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2048
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.113276190267591e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03194247931241989
          total_loss: 351460773432.8889
          vf_explained_var: 2.8477774804969158e-08
          vf_loss: 351460773432.8889
    num_agent_steps_sampled: 1009000
    num_agent_steps_trained: 1009000
    num_steps_sampled: 1009000
    num_steps_trained: 1009000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1009,25006.2,1009000,-1.9915,-1.72,-2.04,199.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1010000
  custom_metrics: {}
  date: 2021-10-24_02-57-47
  done: false
  episode_len_mean: 199.41
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9941000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2053
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0566380951337955e-11
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03194225331147512
          total_loss: 886649044.6222222
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 886649044.6222222
    num_agent_steps_sampled: 1010000
    num_agent_steps_trained: 1010000
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1010,25054.5,1010000,-1.9941,-1.72,-2.04,199.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1011000
  custom_metrics: {}
  date: 2021-10-24_02-58-40
  done: false
  episode_len_mean: 199.41
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.994100000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2058
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.2831904756689776e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031942332784334816
          total_loss: 887400452.9777777
          vf_explained_var: 0.0
          vf_loss: 887400452.9777777
    num_agent_steps_sampled: 1011000
    num_agent_steps_trained: 1011000
    num_steps_sampled: 1011000
    num_steps_trained: 1011000
  iterations_since_restore: 1011
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1011,25106.8,1011000,-1.9941,-1.72,-2.04,199.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1012000
  custom_metrics: {}
  date: 2021-10-24_02-59-29
  done: false
  episode_len_mean: 199.41
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9941000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2063
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6415952378344888e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03194233775138855
          total_loss: 886406875.7333333
          vf_explained_var: 0.0
          vf_loss: 886406875.7333333
    num_agent_steps_sampled: 1012000
    num_agent_steps_trained: 1012000
    num_steps_sampled: 1012000
    num_steps_trained: 1012000
  iterations_since_restore: 1012
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1012,25155.8,1012000,-1.9941,-1.72,-2.04,199.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1013000
  custom_metrics: {}
  date: 2021-10-24_03-00-19
  done: false
  episode_len_mean: 199.41
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.994100000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2068
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3207976189172444e-12
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03194237252076467
          total_loss: 885437001.2444445
          vf_explained_var: -3.311369312086754e-08
          vf_loss: 885437001.2444445
    num_agent_steps_sampled: 1013000
    num_agent_steps_trained: 1013000
    num_steps_sampled: 1013000
    num_steps_trained: 1013000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1013,25206.4,1013000,-1.9941,-1.72,-2.04,199.41




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1014000
  custom_metrics: {}
  date: 2021-10-24_03-01-27
  done: false
  episode_len_mean: 199.11
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9911000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2073
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.603988094586222e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.0006766593368310067
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.01961633563041687
          total_loss: .inf
          vf_explained_var: -0.3333333432674408
          vf_loss: 1552268218.3111112
    num_agent_steps_sampled: 1014000
    num_agent_steps_trained: 1014000
    num_steps_sampled: 1014000
    num_steps_trained: 1014000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1014,25274.4,1014000,-1.9911,-1.7,-2.04,199.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1015000
  custom_metrics: {}
  date: 2021-10-24_03-02-19
  done: false
  episode_len_mean: 199.11
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991100000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2078
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.905982141879325e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0317767858505249
          total_loss: 1085661156.9777777
          vf_explained_var: -4.238552619995062e-08
          vf_loss: 1085661156.9777777
    num_agent_steps_sampled: 1015000
    num_agent_steps_trained: 1015000
    num_steps_sampled: 1015000
    num_steps_trained: 1015000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1015,25325.8,1015000,-1.9911,-1.7,-2.04,199.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1016000
  custom_metrics: {}
  date: 2021-10-24_03-03-08
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.993800000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2083
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.952991070939663e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03177682558695475
          total_loss: 884372878.2222222
          vf_explained_var: -4.238552619995062e-08
          vf_loss: 884372878.2222222
    num_agent_steps_sampled: 1016000
    num_agent_steps_trained: 1016000
    num_steps_sampled: 1016000
    num_steps_trained: 1016000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1016,25374.6,1016000,-1.9938,-1.7,-2.04,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1017000
  custom_metrics: {}
  date: 2021-10-24_03-03-59
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9938000000000011
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2088
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4764955354698313e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03177667657534281
          total_loss: 883023013.6888889
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 883023013.6888889
    num_agent_steps_sampled: 1017000
    num_agent_steps_trained: 1017000
    num_steps_sampled: 1017000
    num_steps_trained: 1017000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1017,25426,1017000,-1.9938,-1.7,-2.04,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1018000
  custom_metrics: {}
  date: 2021-10-24_03-04-50
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9938000000000011
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2093
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2382477677349157e-13
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03177680323521296
          total_loss: 881782852.2666667
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 881782852.2666667
    num_agent_steps_sampled: 1018000
    num_agent_steps_trained: 1018000
    num_steps_sampled: 1018000
    num_steps_trained: 1018000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1018,25477.2,1018000,-1.9938,-1.7,-2.04,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1019000
  custom_metrics: {}
  date: 2021-10-24_03-05-39
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9938000000000011
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2098
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.191238838674578e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031776765982309975
          total_loss: 880527351.4666667
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 880527351.4666667
    num_agent_steps_sampled: 1019000
    num_agent_steps_trained: 1019000
    num_steps_sampled: 1019000
    num_steps_trained: 1019000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1019,25526.2,1019000,-1.9938,-1.7,-2.04,199.38




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1020000
  custom_metrics: {}
  date: 2021-10-24_03-06-46
  done: false
  episode_len_mean: 199.1
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991000000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2103
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.095619419337289e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.0006272374865754197
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02101679758893119
          total_loss: .inf
          vf_explained_var: -0.6138945817947388
          vf_loss: 1709788979.911111
    num_agent_steps_sampled: 1020000
    num_agent_steps_trained: 1020000
    num_steps_sampled: 1020000
    num_steps_trained: 1020000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1020,25592.8,1020000,-1.991,-1.7,-2.04,199.1


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1021000
  custom_metrics: {}
  date: 2021-10-24_03-07-35
  done: false
  episode_len_mean: 199.1
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9910000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2108
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.6434291290059353e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.030981796483198803
          total_loss: 1025803256.8888888
          vf_explained_var: -1.841121246570765e-07
          vf_loss: 1025803256.8888888
    num_agent_steps_sampled: 1021000
    num_agent_steps_trained: 1021000
    num_steps_sampled: 1021000
    num_steps_trained: 1021000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1021,25641.7,1021000,-1.991,-1.7,-2.04,199.1


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1022000
  custom_metrics: {}
  date: 2021-10-24_03-08-25
  done: false
  episode_len_mean: 199.33
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.993300000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2113
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3217145645029677e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03098175177971522
          total_loss: 867433661.8666667
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 867433661.8666667
    num_agent_steps_sampled: 1022000
    num_agent_steps_trained: 1022000
    num_steps_sampled: 1022000
    num_steps_trained: 1022000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1022,25691.5,1022000,-1.9933,-1.7,-2.04,199.33


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1023000
  custom_metrics: {}
  date: 2021-10-24_03-09-13
  done: false
  episode_len_mean: 199.25
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9925000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2118
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1608572822514838e-14
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.030981833736101787
          total_loss: 865748850.4888889
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 865748850.4888889
    num_agent_steps_sampled: 1023000
    num_agent_steps_trained: 1023000
    num_steps_sampled: 1023000
    num_steps_trained: 1023000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1023,25740.3,1023000,-1.9925,-1.7,-2.04,199.25


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1024000
  custom_metrics: {}
  date: 2021-10-24_03-10-05
  done: false
  episode_len_mean: 199.15
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991500000000001
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2123
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.804286411257419e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03098176419734955
          total_loss: 864208964.9777777
          vf_explained_var: -1.3245476937484568e-09
          vf_loss: 864208964.9777777
    num_agent_steps_sampled: 1024000
    num_agent_steps_trained: 1024000
    num_steps_sampled: 1024000
    num_steps_trained: 1024000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1024,25791.8,1024000,-1.9915,-1.7,-2.01,199.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1025000
  custom_metrics: {}
  date: 2021-10-24_03-10-53
  done: false
  episode_len_mean: 199.14
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991400000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2128
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9021432056287096e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03098185857137044
          total_loss: 862618054.4
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 862618054.4
    num_agent_steps_sampled: 1025000
    num_agent_steps_trained: 1025000
    num_steps_sampled: 1025000
    num_steps_trained: 1025000
  iterations_since_restore: 1025
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1025,25839.8,1025000,-1.9914,-1.7,-2,199.14




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1026000
  custom_metrics: {}
  date: 2021-10-24_03-12-04
  done: false
  episode_len_mean: 198.87
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9887000000000008
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2133
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4510716028143548e-15
          cur_lr: 5.000000000000001e-05
          entropy: 0.01967835030130421
          entropy_coeff: 0.009999999999999998
          kl: 0.00281785381925551
          policy_loss: 0.07840816775957743
          total_loss: 27788342978878.58
          vf_explained_var: 0.12187132239341736
          vf_loss: 27788342978878.58
    num_agent_steps_sampled: 1026000
    num_agent_steps_trained: 1026000
    num_steps_sampled: 1026000
    num_steps_trained: 10260

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1026,25910.8,1026000,-1.9887,-1.7,-2,198.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1027000
  custom_metrics: {}
  date: 2021-10-24_03-12-54
  done: false
  episode_len_mean: 198.87
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9887000000000008
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2138
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.255358014071774e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.027770206332206726
          total_loss: 1213667385161.9556
          vf_explained_var: 4.238552619995062e-08
          vf_loss: 1213667385161.9556
    num_agent_steps_sampled: 1027000
    num_agent_steps_trained: 1027000
    num_steps_sampled: 1027000
    num_steps_trained: 1027000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1027,25961.2,1027000,-1.9887,-1.7,-2,198.87


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1028000
  custom_metrics: {}
  date: 2021-10-24_03-13-47
  done: false
  episode_len_mean: 199.15
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991500000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2143
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.627679007035887e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02777010202407837
          total_loss: 1077941098.6666667
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 1077941098.6666667
    num_agent_steps_sampled: 1028000
    num_agent_steps_trained: 1028000
    num_steps_sampled: 1028000
    num_steps_trained: 1028000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1028,26013.4,1028000,-1.9915,-1.7,-2,199.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1029000
  custom_metrics: {}
  date: 2021-10-24_03-14-35
  done: false
  episode_len_mean: 199.15
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991500000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2148
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8138395035179435e-16
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.027770159145196278
          total_loss: 1066593286.4
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 1066593286.4
    num_agent_steps_sampled: 1029000
    num_agent_steps_trained: 1029000
    num_steps_sampled: 1029000
    num_steps_trained: 1029000
  iterations_since_restore: 1029
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1029,26061.8,1029000,-1.9915,-1.7,-2,199.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1030000
  custom_metrics: {}
  date: 2021-10-24_03-15-25
  done: false
  episode_len_mean: 199.15
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991500000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2153
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.069197517589717e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.027770241101582844
          total_loss: 1065144381.8666667
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 1065144381.8666667
    num_agent_steps_sampled: 1030000
    num_agent_steps_trained: 1030000
    num_steps_sampled: 1030000
    num_steps_trained: 1030000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1030,26112,1030000,-1.9915,-1.7,-2,199.15


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1031000
  custom_metrics: {}
  date: 2021-10-24_03-16-15
  done: false
  episode_len_mean: 199.15
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.991500000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2158
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.534598758794859e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.027770062287648518
          total_loss: 1063828908.8
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 1063828908.8
    num_agent_steps_sampled: 1031000
    num_agent_steps_trained: 1031000
    num_steps_sampled: 1031000
    num_steps_trained: 1031000
  iterations_since_restore: 1031
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1031,26161.2,1031000,-1.9915,-1.7,-2,199.15




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1032000
  custom_metrics: {}
  date: 2021-10-24_03-17-22
  done: false
  episode_len_mean: 198.92
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9892000000000007
  episode_reward_min: -2.020000000000001
  episodes_this_iter: 6
  episodes_total: 2164
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2672993793974294e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.025711933964826943
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.18960586968395446
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 115251211184.35556
    num_agent_steps_sampled: 1032000
    num_agent_steps_trained: 1032000
    num_steps_sampled: 1032000
    num_steps_trained: 1032000
  iterations_since_restore: 1032
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1032,26228.7,1032000,-1.9892,-1.7,-2.02,198.92




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1033000
  custom_metrics: {}
  date: 2021-10-24_03-18-11
  done: false
  episode_len_mean: 198.95
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9895000000000007
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 4
  episodes_total: 2168
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.400949069096142e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.017396768782701758
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.002739699236634705
          total_loss: .inf
          vf_explained_var: -0.6670004725456238
          vf_loss: 6209103405124.267
    num_agent_steps_sampled: 1033000
    num_agent_steps_trained: 1033000
    num_steps_sampled: 1033000
    num_steps_trained: 1033000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1033,26277.3,1033000,-1.9895,-1.7,-2.03,198.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1034000
  custom_metrics: {}
  date: 2021-10-24_03-19-02
  done: false
  episode_len_mean: 199.26
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9926000000000008
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2173
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.1014236036442143e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.0003864005311495728
          entropy_coeff: 0.009999999999999998
          kl: 9.02199860498677e-10
          policy_loss: -0.01885822270479467
          total_loss: 7543623492.266666
          vf_explained_var: 0.17207524180412292
          vf_loss: 7543623492.266666
    num_agent_steps_sampled: 1034000
    num_agent_steps_trained: 1034000
    num_steps_sampled: 1034000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1034,26328.7,1034000,-1.9926,-1.72,-2.03,199.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1035000
  custom_metrics: {}
  date: 2021-10-24_03-19-52
  done: false
  episode_len_mean: 199.26
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9926000000000008
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2178
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5507118018221072e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02709435671567917
          total_loss: 1543676275.2
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 1543676275.2
    num_agent_steps_sampled: 1035000
    num_agent_steps_trained: 1035000
    num_steps_sampled: 1035000
    num_steps_trained: 1035000
  iterations_since_restore: 1035
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1035,26378.2,1035000,-1.9926,-1.72,-2.03,199.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1036000
  custom_metrics: {}
  date: 2021-10-24_03-20-40
  done: false
  episode_len_mean: 199.26
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992600000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2183
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2753559009110536e-17
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.027094386518001556
          total_loss: 588767690.6666666
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 588767690.6666666
    num_agent_steps_sampled: 1036000
    num_agent_steps_trained: 1036000
    num_steps_sampled: 1036000
    num_steps_trained: 1036000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1036,26426.2,1036000,-1.9926,-1.72,-2.03,199.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1037000
  custom_metrics: {}
  date: 2021-10-24_03-21-31
  done: false
  episode_len_mean: 199.26
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992600000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2188
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.376779504555268e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.027094221363464992
          total_loss: 586862602.3111111
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 586862602.3111111
    num_agent_steps_sampled: 1037000
    num_agent_steps_trained: 1037000
    num_steps_sampled: 1037000
    num_steps_trained: 1037000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1037,26477.7,1037000,-1.9926,-1.72,-2.03,199.26




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1038000
  custom_metrics: {}
  date: 2021-10-24_03-22-40
  done: false
  episode_len_mean: 198.99
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9899000000000007
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 6
  episodes_total: 2194
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.188389752277634e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.0002898022409580234
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0020950225492318473
          total_loss: .inf
          vf_explained_var: -0.31511190533638
          vf_loss: 2103148652.088889
    num_agent_steps_sampled: 1038000
    num_agent_steps_trained: 1038000
    num_steps_sampled: 1038000
    num_steps_trained: 1038000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1038,26546.1,1038000,-1.9899,-1.72,-2.03,198.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1039000
  custom_metrics: {}
  date: 2021-10-24_03-23-31
  done: false
  episode_len_mean: 198.99
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9899000000000007
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2199
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.782584628416453e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.015791185200214386
          total_loss: 1116544987.0222223
          vf_explained_var: -5.298190686175985e-08
          vf_loss: 1116544987.0222223
    num_agent_steps_sampled: 1039000
    num_agent_steps_trained: 1039000
    num_steps_sampled: 1039000
    num_steps_trained: 1039000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1039,26597.4,1039000,-1.9899,-1.72,-2.03,198.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1040000
  custom_metrics: {}
  date: 2021-10-24_03-24-19
  done: false
  episode_len_mean: 199.27
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.992700000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2204
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3912923142082264e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.015791237354278564
          total_loss: 1009872187.7333333
          vf_explained_var: 4.8345988545861474e-08
          vf_loss: 1009872187.7333333
    num_agent_steps_sampled: 1040000
    num_agent_steps_trained: 1040000
    num_steps_sampled: 1040000
    num_steps_trained: 1040000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1040,26645,1040000,-1.9927,-1.73,-2.03,199.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1041000
  custom_metrics: {}
  date: 2021-10-24_03-25-08
  done: false
  episode_len_mean: 199.27
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.992700000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2209
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1956461571041132e-18
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.01579116036494573
          total_loss: 1004566382.9333333
          vf_explained_var: -1.3907749973895989e-07
          vf_loss: 1004566382.9333333
    num_agent_steps_sampled: 1041000
    num_agent_steps_trained: 1041000
    num_steps_sampled: 1041000
    num_steps_trained: 1041000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1041,26694.3,1041000,-1.9927,-1.73,-2.03,199.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1042000
  custom_metrics: {}
  date: 2021-10-24_03-25-58
  done: false
  episode_len_mean: 199.27
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.992700000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2214
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.978230785520566e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.01579124977191289
          total_loss: 999031450.3111111
          vf_explained_var: 3.973643192267673e-09
          vf_loss: 999031450.3111111
    num_agent_steps_sampled: 1042000
    num_agent_steps_trained: 1042000
    num_steps_sampled: 1042000
    num_steps_trained: 1042000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1042,26744.1,1042000,-1.9927,-1.73,-2.03,199.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1043000
  custom_metrics: {}
  date: 2021-10-24_03-26-48
  done: false
  episode_len_mean: 199.27
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.992700000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2219
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.989115392760283e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.01579117774963379
          total_loss: 994688698.3111111
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 994688698.3111111
    num_agent_steps_sampled: 1043000
    num_agent_steps_trained: 1043000
    num_steps_sampled: 1043000
    num_steps_trained: 1043000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1043,26794.2,1043000,-1.9927,-1.73,-2.03,199.27




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1044000
  custom_metrics: {}
  date: 2021-10-24_03-27-57
  done: false
  episode_len_mean: 198.99
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989900000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2224
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4945576963801415e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.0004829375254404214
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.024577044281694625
          total_loss: .inf
          vf_explained_var: -0.3333333432674408
          vf_loss: 2742072568.177778
    num_agent_steps_sampled: 1044000
    num_agent_steps_trained: 1044000
    num_steps_sampled: 1044000
    num_steps_trained: 1044000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1044,26863.5,1044000,-1.9899,-1.72,-2.03,198.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1045000
  custom_metrics: {}
  date: 2021-10-24_03-28-46
  done: false
  episode_len_mean: 198.99
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989900000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2229
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2418365445702115e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03213886916637421
          total_loss: 1199867696.3555555
          vf_explained_var: 0.0
          vf_loss: 1199867696.3555555
    num_agent_steps_sampled: 1045000
    num_agent_steps_trained: 1045000
    num_steps_sampled: 1045000
    num_steps_trained: 1045000
  iterations_since_restore: 1045
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1045,26912.8,1045000,-1.9899,-1.72,-2.03,198.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1046000
  custom_metrics: {}
  date: 2021-10-24_03-29-37
  done: false
  episode_len_mean: 199.26
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992600000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2234
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1209182722851058e-19
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03213891635338465
          total_loss: 1076514939.7333333
          vf_explained_var: 0.0
          vf_loss: 1076514939.7333333
    num_agent_steps_sampled: 1046000
    num_agent_steps_trained: 1046000
    num_steps_sampled: 1046000
    num_steps_trained: 1046000
  iterations_since_restore: 1046
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1046,26962.9,1046000,-1.9926,-1.72,-2.03,199.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1047000
  custom_metrics: {}
  date: 2021-10-24_03-30-24
  done: false
  episode_len_mean: 199.26
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992600000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2239
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.604591361425529e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03213891635338465
          total_loss: 1071582468.2666667
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 1071582468.2666667
    num_agent_steps_sampled: 1047000
    num_agent_steps_trained: 1047000
    num_steps_sampled: 1047000
    num_steps_trained: 1047000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1047,27010.4,1047000,-1.9926,-1.72,-2.03,199.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1048000
  custom_metrics: {}
  date: 2021-10-24_03-31-16
  done: false
  episode_len_mean: 199.26
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9926000000000008
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2244
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8022956807127644e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03213890145222346
          total_loss: 1066752623.6444445
          vf_explained_var: -5.430645444448601e-08
          vf_loss: 1066752623.6444445
    num_agent_steps_sampled: 1048000
    num_agent_steps_trained: 1048000
    num_steps_sampled: 1048000
    num_steps_trained: 1048000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1048,27062.6,1048000,-1.9926,-1.72,-2.03,199.26


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1049000
  custom_metrics: {}
  date: 2021-10-24_03-32-04
  done: false
  episode_len_mean: 199.26
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992600000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2249
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4011478403563822e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03213890145222346
          total_loss: 1061934924.8
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 1061934924.8
    num_agent_steps_sampled: 1049000
    num_agent_steps_trained: 1049000
    num_steps_sampled: 1049000
    num_steps_trained: 1049000
  iterations_since_restore: 1049
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1049,27110.5,1049000,-1.9926,-1.72,-2.03,199.26




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1050000
  custom_metrics: {}
  date: 2021-10-24_03-33-14
  done: false
  episode_len_mean: 198.99
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989900000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2254
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.005739201781911e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.00028981901834615405
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02621104315751129
          total_loss: .inf
          vf_explained_var: -0.3237621784210205
          vf_loss: 3759114355.2
    num_agent_steps_sampled: 1050000
    num_agent_steps_trained: 1050000
    num_steps_sampled: 1050000
    num_steps_trained: 1050000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1050,27180.2,1050000,-1.9899,-1.72,-2.03,198.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1051000
  custom_metrics: {}
  date: 2021-10-24_03-34-08
  done: false
  episode_len_mean: 198.99
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9899000000000007
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2259
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0508608802672867e-20
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03197921315828959
          total_loss: 1149498453.3333333
          vf_explained_var: -2.2517310682701464e-08
          vf_loss: 1149498453.3333333
    num_agent_steps_sampled: 1051000
    num_agent_steps_trained: 1051000
    num_steps_sampled: 1051000
    num_steps_trained: 1051000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1051,27233.9,1051000,-1.9899,-1.72,-2.03,198.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1052000
  custom_metrics: {}
  date: 2021-10-24_03-34-57
  done: false
  episode_len_mean: 199.22
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9922000000000009
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2264
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.254304401336434e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031979178388913475
          total_loss: 1046615904.0
          vf_explained_var: 2.6490953874969136e-09
          vf_loss: 1046615904.0
    num_agent_steps_sampled: 1052000
    num_agent_steps_trained: 1052000
    num_steps_sampled: 1052000
    num_steps_trained: 1052000
  iterations_since_restore: 1052
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1052,27283.6,1052000,-1.9922,-1.72,-2.03,199.22


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1053000
  custom_metrics: {}
  date: 2021-10-24_03-35-45
  done: false
  episode_len_mean: 199.18
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991800000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2269
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.627152200668217e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031979297598203026
          total_loss: 1041567063.4666667
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 1041567063.4666667
    num_agent_steps_sampled: 1053000
    num_agent_steps_trained: 1053000
    num_steps_sampled: 1053000
    num_steps_trained: 1053000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1053,27331.3,1053000,-1.9918,-1.72,-2,199.18


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1054000
  custom_metrics: {}
  date: 2021-10-24_03-36-38
  done: false
  episode_len_mean: 199.18
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991800000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2274
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3135761003341084e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031979200740655266
          total_loss: 1036547030.0444444
          vf_explained_var: -9.404288192627064e-08
          vf_loss: 1036547030.0444444
    num_agent_steps_sampled: 1054000
    num_agent_steps_trained: 1054000
    num_steps_sampled: 1054000
    num_steps_trained: 1054000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1054,27383.7,1054000,-1.9918,-1.72,-2,199.18


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1055000
  custom_metrics: {}
  date: 2021-10-24_03-37-29
  done: false
  episode_len_mean: 199.18
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991800000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2279
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.567880501670542e-22
          cur_lr: 5.000000000000001e-05
          entropy: 1.4539183703749333e-30
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031979347268740334
          total_loss: 1031417663.2888889
          vf_explained_var: 1.7219120351796846e-08
          vf_loss: 1031417663.2888889
    num_agent_steps_sampled: 1055000
    num_agent_steps_trained: 1055000
    num_steps_sampled: 1055000
    num_steps_trained: 1055000
  ite

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1055,27435.3,1055000,-1.9918,-1.72,-2,199.18




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1056000
  custom_metrics: {}
  date: 2021-10-24_03-38-44
  done: false
  episode_len_mean: 198.89
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.988900000000001
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2284
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.283940250835271e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.02697581412333031
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.026839943919747408
          total_loss: .inf
          vf_explained_var: -0.6667306423187256
          vf_loss: 38107333944.888885
    num_agent_steps_sampled: 1056000
    num_agent_steps_trained: 1056000
    num_steps_sampled: 1056000
    num_steps_trained: 1056000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1056,27509.9,1056000,-1.9889,-1.7,-2.01,198.89




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1057000
  custom_metrics: {}
  date: 2021-10-24_03-39-40
  done: false
  episode_len_mean: 198.96
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.989600000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2289
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.925910376252906e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.03835490032409628
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.09051458504464892
          total_loss: .inf
          vf_explained_var: -0.4077955484390259
          vf_loss: 956125924565.3334
    num_agent_steps_sampled: 1057000
    num_agent_steps_trained: 1057000
    num_steps_sampled: 1057000
    num_steps_trained: 1057000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1057,27566.2,1057000,-1.9896,-1.7,-2.03,198.96




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1058000
  custom_metrics: {}
  date: 2021-10-24_03-40-31
  done: false
  episode_len_mean: 199.32
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.993200000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2294
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.388865564379361e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.05241975440747208
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.051925430612431635
          total_loss: .inf
          vf_explained_var: 0.16507433354854584
          vf_loss: 832380886402.8445
    num_agent_steps_sampled: 1058000
    num_agent_steps_trained: 1058000
    num_steps_sampled: 1058000
    num_steps_trained: 1058000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1058,27617.5,1058000,-1.9932,-1.7,-2.03,199.32




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1059000
  custom_metrics: {}
  date: 2021-10-24_03-41-26
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2299
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.108329834656904e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.06178828592722615
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.08360972321695752
          total_loss: .inf
          vf_explained_var: -0.014582360163331032
          vf_loss: 75751690347378.86
    num_agent_steps_sampled: 1059000
    num_agent_steps_trained: 1059000
    num_steps_sampled: 1059000
    num_steps_trained: 1059000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1059,27671.9,1059000,-1.994,-1.7,-2.04,199.4




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1060000
  custom_metrics: {}
  date: 2021-10-24_03-42-22
  done: false
  episode_len_mean: 199.49
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9949000000000006
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2304
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6624947519853563e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.04531733255005545
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0705600639184316
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 3873796808157.8667
    num_agent_steps_sampled: 1060000
    num_agent_steps_trained: 1060000
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
  iterations_since_restore: 1060
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1060,27728.4,1060000,-1.9949,-1.7,-2.04,199.49


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1061000
  custom_metrics: {}
  date: 2021-10-24_03-43-18
  done: false
  episode_len_mean: 199.57
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9957000000000011
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2309
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.493742127978033e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.054511849458018936
          entropy_coeff: 0.009999999999999998
          kl: -6.744285842636484e-08
          policy_loss: -0.0247271572964059
          total_loss: 1878863309118.578
          vf_explained_var: 0.1996201127767563
          vf_loss: 1878863309118.578
    num_agent_steps_sampled: 1061000
    num_agent_steps_trained: 1061000
    num_steps_sampled: 1061000
    num_steps_trained: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1061,27784.1,1061000,-1.9957,-1.7,-2.04,199.57




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1062000
  custom_metrics: {}
  date: 2021-10-24_03-44-30
  done: false
  episode_len_mean: 199.34
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.993400000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2314
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2468710639890165e-21
          cur_lr: 5.000000000000001e-05
          entropy: 0.010685817193653848
          entropy_coeff: 0.009999999999999998
          kl: -1.67903006313234e-09
          policy_loss: 0.018082746532228258
          total_loss: 199733256760.8889
          vf_explained_var: 0.30460694432258606
          vf_loss: 199733256760.8889
    num_agent_steps_sampled: 1062000
    num_agent_steps_trained: 1062000
    num_steps_sampled: 1062000
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1062,27856.1,1062000,-1.9934,-1.7,-2.04,199.34


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1063000
  custom_metrics: {}
  date: 2021-10-24_03-45-19
  done: false
  episode_len_mean: 199.34
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9934000000000012
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2319
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.2343553199450825e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03195023536682129
          total_loss: 105840298393.6
          vf_explained_var: -1.3245476937484568e-09
          vf_loss: 105840298393.6
    num_agent_steps_sampled: 1063000
    num_agent_steps_trained: 1063000
    num_steps_sampled: 1063000
    num_steps_trained: 1063000
  iterations_since_restore: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1063,27905,1063000,-1.9934,-1.7,-2.04,199.34


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1064000
  custom_metrics: {}
  date: 2021-10-24_03-46-14
  done: false
  episode_len_mean: 199.62
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.996200000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2324
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1171776599725413e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031950252751509346
          total_loss: 1710917290.6666667
          vf_explained_var: 0.0
          vf_loss: 1710917290.6666667
    num_agent_steps_sampled: 1064000
    num_agent_steps_trained: 1064000
    num_steps_sampled: 1064000
    num_steps_trained: 1064000
  iterations_since_restore: 1064
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1064,27959.9,1064000,-1.9962,-1.7,-2.04,199.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1065000
  custom_metrics: {}
  date: 2021-10-24_03-47-03
  done: false
  episode_len_mean: 199.62
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.996200000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2329
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5585888299862706e-22
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03195016582806905
          total_loss: 1707778958.2222223
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 1707778958.2222223
    num_agent_steps_sampled: 1065000
    num_agent_steps_trained: 1065000
    num_steps_sampled: 1065000
    num_steps_trained: 1065000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1065,28008.9,1065000,-1.9962,-1.7,-2.04,199.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1066000
  custom_metrics: {}
  date: 2021-10-24_03-47-55
  done: false
  episode_len_mean: 199.62
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.996200000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2334
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.792944149931353e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03195019066333771
          total_loss: 1706060212.6222222
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 1706060212.6222222
    num_agent_steps_sampled: 1066000
    num_agent_steps_trained: 1066000
    num_steps_sampled: 1066000
    num_steps_trained: 1066000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1066,28060.4,1066000,-1.9962,-1.7,-2.04,199.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1067000
  custom_metrics: {}
  date: 2021-10-24_03-48-46
  done: false
  episode_len_mean: 199.62
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9962000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2339
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8964720749656766e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03195024778445562
          total_loss: 1704451212.8
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 1704451212.8
    num_agent_steps_sampled: 1067000
    num_agent_steps_trained: 1067000
    num_steps_sampled: 1067000
    num_steps_trained: 1067000
  iterations_since_restore: 1067
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1067,28112.2,1067000,-1.9962,-1.7,-2.04,199.62




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1068000
  custom_metrics: {}
  date: 2021-10-24_03-49-56
  done: false
  episode_len_mean: 199.35
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9935000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2344
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9482360374828383e-23
          cur_lr: 5.000000000000001e-05
          entropy: 0.008941194228827953
          entropy_coeff: 0.009999999999999998
          kl: 0.0013641224967108833
          policy_loss: 0.018099924094147152
          total_loss: 79854718978184.53
          vf_explained_var: 0.05600706860423088
          vf_loss: 79854718978184.53
    num_agent_steps_sampled: 1068000
    num_agent_steps_trained: 1068000
    num_steps_sampled: 1068000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1068,28181.4,1068000,-1.9935,-1.7,-2.04,199.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1069000
  custom_metrics: {}
  date: 2021-10-24_03-50-45
  done: false
  episode_len_mean: 199.35
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9935000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2349
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.741180187414191e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031833797693252563
          total_loss: 142627543631.64444
          vf_explained_var: -3.7087335869046e-08
          vf_loss: 142627543631.64444
    num_agent_steps_sampled: 1069000
    num_agent_steps_trained: 1069000
    num_steps_sampled: 1069000
    num_steps_trained: 1069000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1069,28230.7,1069000,-1.9935,-1.7,-2.04,199.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1070000
  custom_metrics: {}
  date: 2021-10-24_03-51-34
  done: false
  episode_len_mean: 199.62
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9962000000000006
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2354
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.870590093707096e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03183359652757645
          total_loss: 1889634285.511111
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 1889634285.511111
    num_agent_steps_sampled: 1070000
    num_agent_steps_trained: 1070000
    num_steps_sampled: 1070000
    num_steps_trained: 1070000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1070,28279.6,1070000,-1.9962,-1.7,-2.04,199.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1071000
  custom_metrics: {}
  date: 2021-10-24_03-52-27
  done: false
  episode_len_mean: 199.62
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9962000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2359
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.435295046853548e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031833671033382416
          total_loss: 1888989779.911111
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 1888989779.911111
    num_agent_steps_sampled: 1071000
    num_agent_steps_trained: 1071000
    num_steps_sampled: 1071000
    num_steps_trained: 1071000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1071,28333.1,1071000,-1.9962,-1.7,-2.04,199.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1072000
  custom_metrics: {}
  date: 2021-10-24_03-53-17
  done: false
  episode_len_mean: 199.62
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9962000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2364
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.217647523426774e-24
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03183363129695257
          total_loss: 1888725769.9555554
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 1888725769.9555554
    num_agent_steps_sampled: 1072000
    num_agent_steps_trained: 1072000
    num_steps_sampled: 1072000
    num_steps_trained: 1072000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1072,28382.5,1072000,-1.9962,-1.7,-2.04,199.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1073000
  custom_metrics: {}
  date: 2021-10-24_03-54-09
  done: false
  episode_len_mean: 199.62
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9962000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2369
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.08823761713387e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03183365116516749
          total_loss: 1888453734.4
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 1888453734.4
    num_agent_steps_sampled: 1073000
    num_agent_steps_trained: 1073000
    num_steps_sampled: 1073000
    num_steps_trained: 1073000
  iterations_since_restore: 1073
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1073,28434.2,1073000,-1.9962,-1.7,-2.04,199.62




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1074000
  custom_metrics: {}
  date: 2021-10-24_03-55-19
  done: false
  episode_len_mean: 199.35
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9935000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2374
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.044118808566935e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.00045836384589266444
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.01675016565455331
          total_loss: .inf
          vf_explained_var: -0.3333333134651184
          vf_loss: 3667996231.111111
    num_agent_steps_sampled: 1074000
    num_agent_steps_trained: 1074000
    num_steps_sampled: 1074000
    num_steps_trained: 1074000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1074,28504.7,1074000,-1.9935,-1.7,-2.04,199.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1075000
  custom_metrics: {}
  date: 2021-10-24_03-56-10
  done: false
  episode_len_mean: 199.35
  episode_media: {}
  episode_reward_max: -1.7000000000000013
  episode_reward_mean: -1.9935000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2379
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.566178212850403e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03131162375211716
          total_loss: 2374569484.8
          vf_explained_var: 1.5894572769070692e-08
          vf_loss: 2374569484.8
    num_agent_steps_sampled: 1075000
    num_agent_steps_trained: 1075000
    num_steps_sampled: 1075000
    num_steps_trained: 1075000
  iterations_since_restore: 1075
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1075,28555.3,1075000,-1.9935,-1.7,-2.04,199.35


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1076000
  custom_metrics: {}
  date: 2021-10-24_03-56-57
  done: false
  episode_len_mean: 199.64
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9964000000000006
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2384
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2830891064252016e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031311814983685814
          total_loss: 1882501091.5555556
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 1882501091.5555556
    num_agent_steps_sampled: 1076000
    num_agent_steps_trained: 1076000
    num_steps_sampled: 1076000
    num_steps_trained: 1076000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1076,28603.1,1076000,-1.9964,-1.73,-2.04,199.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1077000
  custom_metrics: {}
  date: 2021-10-24_03-57-46
  done: false
  episode_len_mean: 199.57
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9957000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2389
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1415445532126008e-25
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03131175537904104
          total_loss: 1881180020.6222222
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 1881180020.6222222
    num_agent_steps_sampled: 1077000
    num_agent_steps_trained: 1077000
    num_steps_sampled: 1077000
    num_steps_trained: 1077000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1077,28651.9,1077000,-1.9957,-1.73,-2.04,199.57


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1078000
  custom_metrics: {}
  date: 2021-10-24_03-58-37
  done: false
  episode_len_mean: 199.48
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9948000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2394
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.707722766063004e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03131187831362089
          total_loss: 1880160308.6222222
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 1880160308.6222222
    num_agent_steps_sampled: 1078000
    num_agent_steps_trained: 1078000
    num_steps_sampled: 1078000
    num_steps_trained: 1078000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1078,28702.4,1078000,-1.9948,-1.73,-2.04,199.48


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1079000
  custom_metrics: {}
  date: 2021-10-24_03-59-25
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2399
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.853861383031502e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03131158525745074
          total_loss: 1879162318.2222223
          vf_explained_var: 0.0
          vf_loss: 1879162318.2222223
    num_agent_steps_sampled: 1079000
    num_agent_steps_trained: 1079000
    num_steps_sampled: 1079000
    num_steps_trained: 1079000
  iterations_since_restore: 1079
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1079,28750.6,1079000,-1.994,-1.73,-2.04,199.4




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1080000
  custom_metrics: {}
  date: 2021-10-24_04-00-34
  done: false
  episode_len_mean: 199.04
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.990400000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2404
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.426930691515751e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.0010204965171093743
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.017175691492027707
          total_loss: .inf
          vf_explained_var: -0.3333333432674408
          vf_loss: 3369863351.4666667
    num_agent_steps_sampled: 1080000
    num_agent_steps_trained: 1080000
    num_steps_sampled: 1080000
    num_steps_trained: 1080000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1080,28819.7,1080000,-1.9904,-1.73,-2.03,199.04


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1081000
  custom_metrics: {}
  date: 2021-10-24_04-01-25
  done: false
  episode_len_mean: 198.96
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.989600000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2409
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.140396037273627e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.028989103933175404
          total_loss: 2218960665.6
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 2218960665.6
    num_agent_steps_sampled: 1081000
    num_agent_steps_trained: 1081000
    num_steps_sampled: 1081000
    num_steps_trained: 1081000
  iterations_since_restore: 1081
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1081,28870.2,1081000,-1.9896,-1.73,-2,198.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1082000
  custom_metrics: {}
  date: 2021-10-24_04-02-14
  done: false
  episode_len_mean: 199.19
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.991900000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2414
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0701980186368135e-26
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02898900955915451
          total_loss: 1786555208.5333333
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 1786555208.5333333
    num_agent_steps_sampled: 1082000
    num_agent_steps_trained: 1082000
    num_steps_sampled: 1082000
    num_steps_trained: 1082000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1082,28919.7,1082000,-1.9919,-1.73,-2,199.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1083000
  custom_metrics: {}
  date: 2021-10-24_04-03-09
  done: false
  episode_len_mean: 199.19
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.991900000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2419
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.350990093184067e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.028989069163799286
          total_loss: 1785147989.3333333
          vf_explained_var: 9.934107225717526e-08
          vf_loss: 1785147989.3333333
    num_agent_steps_sampled: 1083000
    num_agent_steps_trained: 1083000
    num_steps_sampled: 1083000
    num_steps_trained: 1083000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1083,28974.5,1083000,-1.9919,-1.73,-2,199.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1084000
  custom_metrics: {}
  date: 2021-10-24_04-03-58
  done: false
  episode_len_mean: 199.19
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.991900000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2424
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.6754950465920336e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.028989093999067943
          total_loss: 1783988792.8888888
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 1783988792.8888888
    num_agent_steps_sampled: 1084000
    num_agent_steps_trained: 1084000
    num_steps_sampled: 1084000
    num_steps_trained: 1084000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1084,29023.5,1084000,-1.9919,-1.73,-2,199.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1085000
  custom_metrics: {}
  date: 2021-10-24_04-04-46
  done: false
  episode_len_mean: 199.19
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.991900000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2429
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3377475232960168e-27
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.028989029427369434
          total_loss: 1782785452.088889
          vf_explained_var: -5.960464477539063e-08
          vf_loss: 1782785452.088889
    num_agent_steps_sampled: 1085000
    num_agent_steps_trained: 1085000
    num_steps_sampled: 1085000
    num_steps_trained: 1085000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1085,29071.5,1085000,-1.9919,-1.73,-2,199.19




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1086000
  custom_metrics: {}
  date: 2021-10-24_04-05-53
  done: false
  episode_len_mean: 198.93
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9893000000000007
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2434
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.688737616480084e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.005592482909560203
          entropy_coeff: 0.009999999999999998
          kl: 0.0029451420530676843
          policy_loss: -0.1039120270146264
          total_loss: 3638354486141079.0
          vf_explained_var: 0.012870668433606625
          vf_loss: 3638354486141079.0
    num_agent_steps_sampled: 1086000
    num_agent_steps_trained: 1086000
    num_steps_sampled: 1086000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1086,29137.9,1086000,-1.9893,-1.73,-2.01,198.93


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1087000
  custom_metrics: {}
  date: 2021-10-24_04-06-41
  done: false
  episode_len_mean: 198.97
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9897000000000005
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2439
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.344368808240042e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.05570921980672412
          entropy_coeff: 0.009999999999999998
          kl: 0.002085147879360832
          policy_loss: -0.03476198762655258
          total_loss: 1800451036877073.0
          vf_explained_var: 0.3886706829071045
          vf_loss: 1800451036877073.0
    num_agent_steps_sampled: 1087000
    num_agent_steps_trained: 1087000
    num_steps_sampled: 1087000
    num_steps_trained: 1087

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1087,29186.2,1087000,-1.9897,-1.73,-2.01,198.97




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1088000
  custom_metrics: {}
  date: 2021-10-24_04-07-35
  done: false
  episode_len_mean: 199.29
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.992900000000001
  episode_reward_min: -2.020000000000001
  episodes_this_iter: 5
  episodes_total: 2444
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.672184404120021e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.03237857454352909
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02810523518257671
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 3848602147589.689
    num_agent_steps_sampled: 1088000
    num_agent_steps_trained: 1088000
    num_steps_sampled: 1088000
    num_steps_trained: 1088000
  iterations_since_restore: 1088
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1088,29240.6,1088000,-1.9929,-1.73,-2.02,199.29


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1089000
  custom_metrics: {}
  date: 2021-10-24_04-08-26
  done: false
  episode_len_mean: 199.29
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.992900000000001
  episode_reward_min: -2.020000000000001
  episodes_this_iter: 5
  episodes_total: 2449
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.508276606180031e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.025770860413710277
          total_loss: 1886689229391.6445
          vf_explained_var: -3.311369178859991e-09
          vf_loss: 1886689229391.6445
    num_agent_steps_sampled: 1089000
    num_agent_steps_trained: 1089000
    num_steps_sampled: 1089000
    num_steps_trained: 1089000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1089,29291.5,1089000,-1.9929,-1.73,-2.02,199.29


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1090000
  custom_metrics: {}
  date: 2021-10-24_04-09-15
  done: false
  episode_len_mean: 199.29
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.992900000000001
  episode_reward_min: -2.020000000000001
  episodes_this_iter: 5
  episodes_total: 2454
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2541383030900154e-28
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0257708082596461
          total_loss: 2401378798.9333334
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 2401378798.9333334
    num_agent_steps_sampled: 1090000
    num_agent_steps_trained: 1090000
    num_steps_sampled: 1090000
    num_steps_trained: 1090000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1090,29339.9,1090000,-1.9929,-1.73,-2.02,199.29




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1091000
  custom_metrics: {}
  date: 2021-10-24_04-10-18
  done: false
  episode_len_mean: 199.02
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9902000000000013
  episode_reward_min: -2.020000000000001
  episodes_this_iter: 6
  episodes_total: 2460
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.270691515450077e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05967071900765101
          total_loss: 2301182092.8
          vf_explained_var: 1.1920928955078125e-07
          vf_loss: 2301182092.8
    num_agent_steps_sampled: 1091000
    num_agent_steps_trained: 1091000
    num_steps_sampled: 1091000
    num_steps_trained: 1091000
  iterations_since_restore: 1091
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1091,29403.7,1091000,-1.9902,-1.73,-2.02,199.02


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1092000
  custom_metrics: {}
  date: 2021-10-24_04-11-10
  done: false
  episode_len_mean: 199.05
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.990500000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2465
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.1353457577250386e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.02254050349195798
          entropy_coeff: 0.009999999999999998
          kl: 0.00023657520489198052
          policy_loss: 0.13074557847446866
          total_loss: 725210032603.0222
          vf_explained_var: -0.3333333432674408
          vf_loss: 725210032603.0222
    num_agent_steps_sampled: 1092000
    num_agent_steps_trained: 1092000
    num_steps_sampled: 1092000
    num_steps_trained: 109

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1092,29455.1,1092000,-1.9905,-1.73,-2.03,199.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1093000
  custom_metrics: {}
  date: 2021-10-24_04-12-00
  done: false
  episode_len_mean: 199.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993200000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2470
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5676728788625193e-29
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05408429602781931
          total_loss: 1772829897159.111
          vf_explained_var: 1.5894572769070692e-08
          vf_loss: 1772829897159.111
    num_agent_steps_sampled: 1093000
    num_agent_steps_trained: 1093000
    num_steps_sampled: 1093000
    num_steps_trained: 1093000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1093,29504.7,1093000,-1.9932,-1.73,-2.03,199.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1094000
  custom_metrics: {}
  date: 2021-10-24_04-12-50
  done: false
  episode_len_mean: 199.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993200000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2475
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.838364394312596e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05408445745706558
          total_loss: 2685726776.888889
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 2685726776.888889
    num_agent_steps_sampled: 1094000
    num_agent_steps_trained: 1094000
    num_steps_sampled: 1094000
    num_steps_trained: 1094000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1094,29555.5,1094000,-1.9932,-1.73,-2.03,199.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1095000
  custom_metrics: {}
  date: 2021-10-24_04-13-40
  done: false
  episode_len_mean: 199.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993200000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2480
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.919182197156298e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.054084510852893196
          total_loss: 2568793173.3333335
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 2568793173.3333335
    num_agent_steps_sampled: 1095000
    num_agent_steps_trained: 1095000
    num_steps_sampled: 1095000
    num_steps_trained: 1095000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1095,29605.6,1095000,-1.9932,-1.73,-2.03,199.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1096000
  custom_metrics: {}
  date: 2021-10-24_04-14-29
  done: false
  episode_len_mean: 199.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993200000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2485
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.959591098578149e-30
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05408440281947454
          total_loss: 2567606673.0666666
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 2567606673.0666666
    num_agent_steps_sampled: 1096000
    num_agent_steps_trained: 1096000
    num_steps_sampled: 1096000
    num_steps_trained: 1096000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1096,29654.6,1096000,-1.9932,-1.73,-2.03,199.32




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1097000
  custom_metrics: {}
  date: 2021-10-24_04-15-36
  done: false
  episode_len_mean: 199.05
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.990500000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2490
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.797955492890746e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.04569548865159353
          total_loss: 2567074898.4888887
          vf_explained_var: 3.046459795541523e-08
          vf_loss: 2567074898.4888887
    num_agent_steps_sampled: 1097000
    num_agent_steps_trained: 1097000
    num_steps_sampled: 1097000
    num_steps_trained: 1097000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1097,29721.4,1097000,-1.9905,-1.73,-2.03,199.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1098000
  custom_metrics: {}
  date: 2021-10-24_04-16-27
  done: false
  episode_len_mean: 199.05
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.990500000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2495
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.898977746445373e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03237000356117884
          total_loss: 2940103136.711111
          vf_explained_var: 4.503462136540293e-08
          vf_loss: 2940103136.711111
    num_agent_steps_sampled: 1098000
    num_agent_steps_trained: 1098000
    num_steps_sampled: 1098000
    num_steps_trained: 1098000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1098,29772.2,1098000,-1.9905,-1.73,-2.03,199.05


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1099000
  custom_metrics: {}
  date: 2021-10-24_04-17-17
  done: false
  episode_len_mean: 199.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993200000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2500
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4494888732226864e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03236989428599676
          total_loss: 2939427615.288889
          vf_explained_var: 0.0
          vf_loss: 2939427615.288889
    num_agent_steps_sampled: 1099000
    num_agent_steps_trained: 1099000
    num_steps_sampled: 1099000
    num_steps_trained: 1099000
  iterations_since_restore: 1099
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1099,29822.5,1099000,-1.9932,-1.73,-2.03,199.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1100000
  custom_metrics: {}
  date: 2021-10-24_04-18-07
  done: false
  episode_len_mean: 199.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993200000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2505
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2247444366113432e-31
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03236997872591019
          total_loss: 2938681776.3555555
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 2938681776.3555555
    num_agent_steps_sampled: 1100000
    num_agent_steps_trained: 1100000
    num_steps_sampled: 1100000
    num_steps_trained: 1100000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1100,29871.7,1100000,-1.9932,-1.73,-2.03,199.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1101000
  custom_metrics: {}
  date: 2021-10-24_04-18-55
  done: false
  episode_len_mean: 199.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993200000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2510
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.123722183056716e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03236993153889974
          total_loss: 2937925705.9555554
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 2937925705.9555554
    num_agent_steps_sampled: 1101000
    num_agent_steps_trained: 1101000
    num_steps_sampled: 1101000
    num_steps_trained: 1101000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1101,29920.5,1101000,-1.9932,-1.73,-2.03,199.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1102000
  custom_metrics: {}
  date: 2021-10-24_04-19-45
  done: false
  episode_len_mean: 199.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993200000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2515
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.061861091528358e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.032370040814081825
          total_loss: 2937145403.733333
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 2937145403.733333
    num_agent_steps_sampled: 1102000
    num_agent_steps_trained: 1102000
    num_steps_sampled: 1102000
    num_steps_trained: 1102000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1102,29969.7,1102000,-1.9932,-1.73,-2.03,199.32




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1103000
  custom_metrics: {}
  date: 2021-10-24_04-20-50
  done: false
  episode_len_mean: 199.04
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.990400000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2520
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.530930545764179e-32
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.020045955975850422
          total_loss: 2936337388.088889
          vf_explained_var: -5.960464477539063e-08
          vf_loss: 2936337388.088889
    num_agent_steps_sampled: 1103000
    num_agent_steps_trained: 1103000
    num_steps_sampled: 1103000
    num_steps_trained: 1103000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1103,30034.6,1103000,-1.9904,-1.72,-2.03,199.04


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1104000
  custom_metrics: {}
  date: 2021-10-24_04-21-41
  done: false
  episode_len_mean: 199.04
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9904000000000008
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2525
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.654652728820895e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03202394644419352
          total_loss: 2922313784.888889
          vf_explained_var: -6.225373994084293e-08
          vf_loss: 2922313784.888889
    num_agent_steps_sampled: 1104000
    num_agent_steps_trained: 1104000
    num_steps_sampled: 1104000
    num_steps_trained: 1104000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1104,30086.1,1104000,-1.9904,-1.72,-2.03,199.04


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1105000
  custom_metrics: {}
  date: 2021-10-24_04-22-30
  done: false
  episode_len_mean: 199.31
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9931000000000012
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2530
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8273263644104475e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03202385952075323
          total_loss: 2921401910.0444446
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 2921401910.0444446
    num_agent_steps_sampled: 1105000
    num_agent_steps_trained: 1105000
    num_steps_sampled: 1105000
    num_steps_trained: 1105000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1105,30135.3,1105000,-1.9931,-1.72,-2.03,199.31


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1106000
  custom_metrics: {}
  date: 2021-10-24_04-23-18
  done: false
  episode_len_mean: 199.3
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.993000000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2535
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9136631822052237e-33
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.032023943960666656
          total_loss: 2920475724.8
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 2920475724.8
    num_agent_steps_sampled: 1106000
    num_agent_steps_trained: 1106000
    num_steps_sampled: 1106000
    num_steps_trained: 1106000
  iterations_since_restore: 1106
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1106,30182.9,1106000,-1.993,-1.72,-2.03,199.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1107000
  custom_metrics: {}
  date: 2021-10-24_04-24-08
  done: false
  episode_len_mean: 199.25
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992500000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2540
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.568315911026119e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03202389429012934
          total_loss: 2919515807.288889
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 2919515807.288889
    num_agent_steps_sampled: 1107000
    num_agent_steps_trained: 1107000
    num_steps_sampled: 1107000
    num_steps_trained: 1107000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1107,30233.3,1107000,-1.9925,-1.72,-2.03,199.25


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1108000
  custom_metrics: {}
  date: 2021-10-24_04-24-56
  done: false
  episode_len_mean: 199.21
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9921000000000009
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2545
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.784157955513059e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03202391415834427
          total_loss: 2918517424.3555555
          vf_explained_var: -3.1789145538141383e-08
          vf_loss: 2918517424.3555555
    num_agent_steps_sampled: 1108000
    num_agent_steps_trained: 1108000
    num_steps_sampled: 1108000
    num_steps_trained: 1108000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1108,30280.8,1108000,-1.9921,-1.72,-2.03,199.21




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1109000
  custom_metrics: {}
  date: 2021-10-24_04-26-03
  done: false
  episode_len_mean: 198.94
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9894000000000007
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2550
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3920789777565297e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.10347118104497592
          total_loss: 2917555677.866667
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 2917555677.866667
    num_agent_steps_sampled: 1109000
    num_agent_steps_trained: 1109000
    num_steps_sampled: 1109000
    num_steps_trained: 1109000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1109,30347.6,1109000,-1.9894,-1.72,-2.03,198.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1110000
  custom_metrics: {}
  date: 2021-10-24_04-26-54
  done: false
  episode_len_mean: 198.95
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989500000000001
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2555
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1960394888782648e-34
          cur_lr: 5.000000000000001e-05
          entropy: 0.01045496269232697
          entropy_coeff: 0.009999999999999998
          kl: 3.2697612078407763e-08
          policy_loss: 0.026342624094751146
          total_loss: 149614442717.86667
          vf_explained_var: -0.3333333432674408
          vf_loss: 149614442717.86667
    num_agent_steps_sampled: 1110000
    num_agent_steps_trained: 1110000
    num_steps_sampled: 1110000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1110,30398.7,1110000,-1.9895,-1.72,-2.03,198.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1111000
  custom_metrics: {}
  date: 2021-10-24_04-27-43
  done: false
  episode_len_mean: 199.22
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9922000000000009
  episode_reward_min: -2.0300000000000007
  episodes_this_iter: 5
  episodes_total: 2560
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.980197444391324e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03194226324558258
          total_loss: 156270506439.1111
          vf_explained_var: 1.7219120351796846e-08
          vf_loss: 156270506439.1111
    num_agent_steps_sampled: 1111000
    num_agent_steps_trained: 1111000
    num_steps_sampled: 1111000
    num_steps_trained: 1111000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1111,30447.4,1111000,-1.9922,-1.72,-2.03,199.22


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1112000
  custom_metrics: {}
  date: 2021-10-24_04-28-31
  done: false
  episode_len_mean: 199.19
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9919000000000011
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2565
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.990098722195662e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031942365070184074
          total_loss: 2935777280.0
          vf_explained_var: 7.020102543719986e-08
          vf_loss: 2935777280.0
    num_agent_steps_sampled: 1112000
    num_agent_steps_trained: 1112000
    num_steps_sampled: 1112000
    num_steps_trained: 1112000
  iterations_since_restore: 1112
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1112,30495.2,1112000,-1.9919,-1.72,-2.01,199.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1113000
  custom_metrics: {}
  date: 2021-10-24_04-29-19
  done: false
  episode_len_mean: 199.19
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9919000000000011
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2570
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.495049361097831e-35
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031942387421925865
          total_loss: 2931769787.733333
          vf_explained_var: 5.828009719266447e-08
          vf_loss: 2931769787.733333
    num_agent_steps_sampled: 1113000
    num_agent_steps_trained: 1113000
    num_steps_sampled: 1113000
    num_steps_trained: 1113000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1113,30543.5,1113000,-1.9919,-1.72,-2.01,199.19


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1114000
  custom_metrics: {}
  date: 2021-10-24_04-30-09
  done: false
  episode_len_mean: 199.19
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991900000000001
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2575
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.475246805489155e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03194231788317362
          total_loss: 2930319104.0
          vf_explained_var: 1.7219120351796846e-08
          vf_loss: 2930319104.0
    num_agent_steps_sampled: 1114000
    num_agent_steps_trained: 1114000
    num_steps_sampled: 1114000
    num_steps_trained: 1114000
  iterations_since_restore: 1114
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1114,30593.5,1114000,-1.9919,-1.72,-2.01,199.19




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1115000
  custom_metrics: {}
  date: 2021-10-24_04-31-14
  done: false
  episode_len_mean: 198.92
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9892000000000007
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2580
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7376234027445776e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.0048752762377262115
          total_loss: 2928893929.2444444
          vf_explained_var: 9.934107225717526e-08
          vf_loss: 2928893929.2444444
    num_agent_steps_sampled: 1115000
    num_agent_steps_trained: 1115000
    num_steps_sampled: 1115000
    num_steps_trained: 1115000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1115,30659.2,1115000,-1.9892,-1.72,-2.01,198.92




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1116000
  custom_metrics: {}
  date: 2021-10-24_04-32-05
  done: false
  episode_len_mean: 199.0
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9900000000000009
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2585
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8688117013722888e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.03484742835991912
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07616126553879844
          total_loss: .inf
          vf_explained_var: -1.0
          vf_loss: 321398064014.2222
    num_agent_steps_sampled: 1116000
    num_agent_steps_trained: 1116000
    num_steps_sampled: 1116000
    num_steps_trained: 1116000
  iterations_since_restore: 1116
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1116,30709.5,1116000,-1.99,-1.72,-2.04,199


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1117000
  custom_metrics: {}
  date: 2021-10-24_04-32-56
  done: false
  episode_len_mean: 199.28
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9928000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2590
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.803217552058433e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.016171918850806023
          entropy_coeff: 0.009999999999999998
          kl: 0.00233458715180556
          policy_loss: -0.07962183819876777
          total_loss: 13011070806880.71
          vf_explained_var: 0.2383406162261963
          vf_loss: 13011070806880.71
    num_agent_steps_sampled: 1117000
    num_agent_steps_trained: 1117000
    num_steps_sampled: 1117000
    num_steps_trained: 11170

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1117,30760.3,1117000,-1.9928,-1.72,-2.04,199.28




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1118000
  custom_metrics: {}
  date: 2021-10-24_04-33-46
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.993800000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2595
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4016087760292165e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.03488181082324849
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06240435325437122
          total_loss: .inf
          vf_explained_var: -0.6679790019989014
          vf_loss: 108169527849870.22
    num_agent_steps_sampled: 1118000
    num_agent_steps_trained: 1118000
    num_steps_sampled: 1118000
    num_steps_trained: 1118000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1118,30810.6,1118000,-1.9938,-1.72,-2.05,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1119000
  custom_metrics: {}
  date: 2021-10-24_04-34-37
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9938000000000011
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2600
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.102413164043825e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03191828727722168
          total_loss: 378431406080.0
          vf_explained_var: 1.3245476715439963e-08
          vf_loss: 378431406080.0
    num_agent_steps_sampled: 1119000
    num_agent_steps_trained: 1119000
    num_steps_sampled: 1119000
    num_steps_trained: 1119000
  iterations_since_restore: 1119

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1119,30861.2,1119000,-1.9938,-1.72,-2.05,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1120000
  custom_metrics: {}
  date: 2021-10-24_04-35-29
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9938000000000011
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2605
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0512065820219124e-36
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03191815813382467
          total_loss: 3023987370.6666665
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 3023987370.6666665
    num_agent_steps_sampled: 1120000
    num_agent_steps_trained: 1120000
    num_steps_sampled: 1120000
    num_steps_trained: 1120000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1120,30913.2,1120000,-1.9938,-1.72,-2.05,199.38




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1121000
  custom_metrics: {}
  date: 2021-10-24_04-36-34
  done: false
  episode_len_mean: 199.11
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991100000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2610
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.256032910109562e-37
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.029686326781908672
          total_loss: 3010390963.2
          vf_explained_var: 6.556511067401516e-08
          vf_loss: 3010390963.2
    num_agent_steps_sampled: 1121000
    num_agent_steps_trained: 1121000
    num_steps_sampled: 1121000
    num_steps_trained: 1121000
  iterations_since_restore: 1121
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1121,30978.5,1121000,-1.9911,-1.72,-2.05,199.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1122000
  custom_metrics: {}
  date: 2021-10-24_04-37-26
  done: false
  episode_len_mean: 199.12
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9912000000000012
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2615
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.628016455054781e-37
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0317262498040994
          total_loss: 2999061313.422222
          vf_explained_var: 3.7087335869046e-08
          vf_loss: 2999061313.422222
    num_agent_steps_sampled: 1122000
    num_agent_steps_trained: 1122000
    num_steps_sampled: 1122000
    num_steps_trained: 1122000
  iterations_since_restore: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1122,31030.3,1122000,-1.9912,-1.72,-2.05,199.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1123000
  custom_metrics: {}
  date: 2021-10-24_04-38-14
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2620
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3140082275273905e-37
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03172623490293821
          total_loss: 2996555699.2
          vf_explained_var: 2.9802322387695312e-08
          vf_loss: 2996555699.2
    num_agent_steps_sampled: 1123000
    num_agent_steps_trained: 1123000
    num_steps_sampled: 1123000
    num_steps_trained: 1123000
  iterations_since_restore: 1123
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1123,31078.9,1123000,-1.994,-1.73,-2.05,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1124000
  custom_metrics: {}
  date: 2021-10-24_04-39-04
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2625
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.570041137636953e-38
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031726171573003135
          total_loss: 2993926946.133333
          vf_explained_var: -7.947286384535346e-09
          vf_loss: 2993926946.133333
    num_agent_steps_sampled: 1124000
    num_agent_steps_trained: 1124000
    num_steps_sampled: 1124000
    num_steps_trained: 1124000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1124,31128.4,1124000,-1.994,-1.73,-2.05,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1125000
  custom_metrics: {}
  date: 2021-10-24_04-39-53
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2630
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2850205688184764e-38
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03172619889179865
          total_loss: 2991196307.911111
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 2991196307.911111
    num_agent_steps_sampled: 1125000
    num_agent_steps_trained: 1125000
    num_steps_sampled: 1125000
    num_steps_trained: 1125000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1125,31177.2,1125000,-1.994,-1.73,-2.05,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1126000
  custom_metrics: {}
  date: 2021-10-24_04-40-42
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2635
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6425102844092382e-38
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031726185232400894
          total_loss: 2988314703.6444445
          vf_explained_var: -2.6490953874969136e-09
          vf_loss: 2988314703.6444445
    num_agent_steps_sampled: 1126000
    num_agent_steps_trained: 1126000
    num_steps_sampled: 1126000
    num_steps_trained: 1126000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1126,31226.6,1126000,-1.994,-1.73,-2.05,199.4




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1127000
  custom_metrics: {}
  date: 2021-10-24_04-41-49
  done: false
  episode_len_mean: 199.13
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9913000000000007
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2640
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.212551422046191e-39
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.3364237646261851
          total_loss: 2985313945.6
          vf_explained_var: 7.947286206899662e-08
          vf_loss: 2985313945.6
    num_agent_steps_sampled: 1127000
    num_agent_steps_trained: 1127000
    num_steps_sampled: 1127000
    num_steps_trained: 1127000
  iterations_since_restore: 1127
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1127,31292.9,1127000,-1.9913,-1.73,-2.05,199.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1128000
  custom_metrics: {}
  date: 2021-10-24_04-42-40
  done: false
  episode_len_mean: 199.13
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9913000000000012
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2645
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.1062757110230954e-39
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.030796319246292114
          total_loss: 238559522187582.56
          vf_explained_var: -6.821420583946747e-08
          vf_loss: 238559522187582.56
    num_agent_steps_sampled: 1128000
    num_agent_steps_trained: 1128000
    num_steps_sampled: 1128000
    num_steps_trained: 1128000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1128,31344.4,1128000,-1.9913,-1.73,-2.05,199.13


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1129000
  custom_metrics: {}
  date: 2021-10-24_04-43-29
  done: false
  episode_len_mean: 199.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9940000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2650
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0531378555115477e-39
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03079611559708913
          total_loss: 3186632960.0
          vf_explained_var: 8.609560175898423e-09
          vf_loss: 3186632960.0
    num_agent_steps_sampled: 1129000
    num_agent_steps_trained: 1129000
    num_steps_sampled: 1129000
    num_steps_trained: 1129000
  iterations_since_restore: 1129
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1129,31392.9,1129000,-1.994,-1.73,-2.05,199.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1130000
  custom_metrics: {}
  date: 2021-10-24_04-44-19
  done: false
  episode_len_mean: 199.39
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993900000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2655
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0265689277557739e-39
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03079620748758316
          total_loss: 3172208389.688889
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 3172208389.688889
    num_agent_steps_sampled: 1130000
    num_agent_steps_trained: 1130000
    num_steps_sampled: 1130000
    num_steps_trained: 1130000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1130,31442.9,1130000,-1.9939,-1.73,-2.05,199.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1131000
  custom_metrics: {}
  date: 2021-10-24_04-45-07
  done: false
  episode_len_mean: 199.39
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993900000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2660
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.132844638778869e-40
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03079618513584137
          total_loss: 3171370379.3777776
          vf_explained_var: -5.960464477539063e-08
          vf_loss: 3171370379.3777776
    num_agent_steps_sampled: 1131000
    num_agent_steps_trained: 1131000
    num_steps_sampled: 1131000
    num_steps_trained: 1131000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1131,31491.3,1131000,-1.9939,-1.73,-2.05,199.39


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1132000
  custom_metrics: {}
  date: 2021-10-24_04-45-55
  done: false
  episode_len_mean: 199.39
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993900000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2665
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5664223193894347e-40
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.030796095728874207
          total_loss: 3170625820.4444447
          vf_explained_var: 1.4503797274301178e-07
          vf_loss: 3170625820.4444447
    num_agent_steps_sampled: 1132000
    num_agent_steps_trained: 1132000
    num_steps_sampled: 1132000
    num_steps_trained: 1132000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1132,31539.5,1132000,-1.9939,-1.73,-2.05,199.39




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1133000
  custom_metrics: {}
  date: 2021-10-24_04-47-05
  done: false
  episode_len_mean: 199.11
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991100000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2670
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2832111596947173e-40
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.013336422542730967
          total_loss: 3169762966.7555556
          vf_explained_var: -3.1789145538141383e-08
          vf_loss: 3169762966.7555556
    num_agent_steps_sampled: 1133000
    num_agent_steps_trained: 1133000
    num_steps_sampled: 1133000
    num_steps_trained: 1133000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1133,31608.8,1133000,-1.9911,-1.72,-2.05,199.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1134000
  custom_metrics: {}
  date: 2021-10-24_04-47-54
  done: false
  episode_len_mean: 199.11
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991100000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2675
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.416055798473587e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02696372816960017
          total_loss: 2803317535.288889
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 2803317535.288889
    num_agent_steps_sampled: 1134000
    num_agent_steps_trained: 1134000
    num_steps_sampled: 1134000
    num_steps_trained: 1134000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1134,31658.1,1134000,-1.9911,-1.72,-2.05,199.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1135000
  custom_metrics: {}
  date: 2021-10-24_04-48-43
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.993800000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2680
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2080278992367933e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.026963735620180767
          total_loss: 2802561635.5555553
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 2802561635.5555553
    num_agent_steps_sampled: 1135000
    num_agent_steps_trained: 1135000
    num_steps_sampled: 1135000
    num_steps_trained: 1135000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1135,31707,1135000,-1.9938,-1.72,-2.05,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1136000
  custom_metrics: {}
  date: 2021-10-24_04-49-33
  done: false
  episode_len_mean: 199.3
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.993000000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2685
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6040139496183967e-41
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02696369340022405
          total_loss: 2801775621.688889
          vf_explained_var: 1.9868215961338365e-09
          vf_loss: 2801775621.688889
    num_agent_steps_sampled: 1136000
    num_agent_steps_trained: 1136000
    num_steps_sampled: 1136000
    num_steps_trained: 1136000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1136,31756.8,1136000,-1.993,-1.72,-2.05,199.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1137000
  custom_metrics: {}
  date: 2021-10-24_04-50-21
  done: false
  episode_len_mean: 199.29
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9929000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2690
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.020069748091983e-42
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02696382502714793
          total_loss: 2800938461.866667
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 2800938461.866667
    num_agent_steps_sampled: 1137000
    num_agent_steps_trained: 1137000
    num_steps_sampled: 1137000
    num_steps_trained: 1137000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1137,31804.8,1137000,-1.9929,-1.72,-2.05,199.29


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1138000
  custom_metrics: {}
  date: 2021-10-24_04-51-10
  done: false
  episode_len_mean: 199.19
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9919000000000011
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2695
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0100348740459916e-42
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.026963695883750916
          total_loss: 2800102533.688889
          vf_explained_var: -9.934107758624577e-09
          vf_loss: 2800102533.688889
    num_agent_steps_sampled: 1138000
    num_agent_steps_trained: 1138000
    num_steps_sampled: 1138000
    num_steps_trained: 1138000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1138,31854.2,1138000,-1.9919,-1.72,-2.01,199.19




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1139000
  custom_metrics: {}
  date: 2021-10-24_04-52-18
  done: false
  episode_len_mean: 198.91
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9891000000000008
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 6
  episodes_total: 2701
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0050174370229958e-42
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.03684110504885515
          total_loss: 2883148902.4
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 2883148902.4
    num_agent_steps_sampled: 1139000
    num_agent_steps_trained: 1139000
    num_steps_sampled: 1139000
    num_steps_trained: 1139000
  iterations_since_restore: 1139
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1139,31921.8,1139000,-1.9891,-1.72,-2.01,198.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1140000
  custom_metrics: {}
  date: 2021-10-24_04-53-06
  done: false
  episode_len_mean: 198.91
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9891000000000008
  episode_reward_min: -2.010000000000001
  episodes_this_iter: 5
  episodes_total: 2706
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0025087185114979e-42
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.059019808967908226
          total_loss: 2821630853.688889
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 2821630853.688889
    num_agent_steps_sampled: 1140000
    num_agent_steps_trained: 1140000
    num_steps_sampled: 1140000
    num_steps_trained: 1140000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1140,31970.5,1140000,-1.9891,-1.72,-2.01,198.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1141000
  custom_metrics: {}
  date: 2021-10-24_04-53-55
  done: false
  episode_len_mean: 199.17
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991700000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2711
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.0125435925574896e-43
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05901993811130524
          total_loss: 2820572003.5555553
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 2820572003.5555553
    num_agent_steps_sampled: 1141000
    num_agent_steps_trained: 1141000
    num_steps_sampled: 1141000
    num_steps_trained: 1141000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1141,32019.3,1141000,-1.9917,-1.72,-2,199.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1142000
  custom_metrics: {}
  date: 2021-10-24_04-54-45
  done: false
  episode_len_mean: 199.17
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991700000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2716
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5062717962787448e-43
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05901992196838061
          total_loss: 2819505012.6222224
          vf_explained_var: -6.755193027174755e-08
          vf_loss: 2819505012.6222224
    num_agent_steps_sampled: 1142000
    num_agent_steps_trained: 1142000
    num_steps_sampled: 1142000
    num_steps_trained: 1142000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1142,32068.5,1142000,-1.9917,-1.72,-2,199.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1143000
  custom_metrics: {}
  date: 2021-10-24_04-55-34
  done: false
  episode_len_mean: 199.17
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991700000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2721
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2531358981393724e-43
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05901991824309031
          total_loss: 2818380461.5111113
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 2818380461.5111113
    num_agent_steps_sampled: 1143000
    num_agent_steps_trained: 1143000
    num_steps_sampled: 1143000
    num_steps_trained: 1143000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1143,32117.7,1143000,-1.9917,-1.72,-2,199.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1144000
  custom_metrics: {}
  date: 2021-10-24_04-56-22
  done: false
  episode_len_mean: 199.17
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991700000000001
  episode_reward_min: -2.0000000000000013
  episodes_this_iter: 5
  episodes_total: 2726
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.265679490696862e-44
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05901992817719778
          total_loss: 2817202488.888889
          vf_explained_var: 2.1855036891338386e-08
          vf_loss: 2817202488.888889
    num_agent_steps_sampled: 1144000
    num_agent_steps_trained: 1144000
    num_steps_sampled: 1144000
    num_steps_trained: 1144000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1144,32165.9,1144000,-1.9917,-1.72,-2,199.17




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1145000
  custom_metrics: {}
  date: 2021-10-24_04-57-29
  done: false
  episode_len_mean: 198.95
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989500000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2731
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.132839745348431e-44
          cur_lr: 5.000000000000001e-05
          entropy: 0.0003907305977514221
          entropy_coeff: 0.009999999999999998
          kl: 0.0027559837326407433
          policy_loss: 0.08305485546588898
          total_loss: 9692797118.577778
          vf_explained_var: -0.004852097015827894
          vf_loss: 9692797118.577778
    num_agent_steps_sampled: 1145000
    num_agent_steps_trained: 1145000
    num_steps_sampled: 1145000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1145,32232.7,1145000,-1.9895,-1.72,-2.05,198.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1146000
  custom_metrics: {}
  date: 2021-10-24_04-58-19
  done: false
  episode_len_mean: 198.95
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989500000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2736
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5664198726742155e-44
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.025893546640872955
          total_loss: 5100691586.844444
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 5100691586.844444
    num_agent_steps_sampled: 1146000
    num_agent_steps_trained: 1146000
    num_steps_sampled: 1146000
    num_steps_trained: 1146000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1146,32283,1146000,-1.9895,-1.72,-2.05,198.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1147000
  custom_metrics: {}
  date: 2021-10-24_04-59-07
  done: false
  episode_len_mean: 199.22
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992200000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2741
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.832099363371077e-45
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.025893449783325195
          total_loss: 3200527672.888889
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 3200527672.888889
    num_agent_steps_sampled: 1147000
    num_agent_steps_trained: 1147000
    num_steps_sampled: 1147000
    num_steps_trained: 1147000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1147,32330.6,1147000,-1.9922,-1.72,-2.05,199.22


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1148000
  custom_metrics: {}
  date: 2021-10-24_04-59-59
  done: false
  episode_len_mean: 199.22
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9922000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2746
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.916049681685539e-45
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.025893479585647583
          total_loss: 3198200462.2222223
          vf_explained_var: 0.0
          vf_loss: 3198200462.2222223
    num_agent_steps_sampled: 1148000
    num_agent_steps_trained: 1148000
    num_steps_sampled: 1148000
    num_steps_trained: 1148000
  iterations_since_restore: 1148
  node_ip

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1148,32383.1,1148000,-1.9922,-1.72,-2.05,199.22


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1149000
  custom_metrics: {}
  date: 2021-10-24_05-00-48
  done: false
  episode_len_mean: 199.22
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9922000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2751
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9580248408427694e-45
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02589343984921773
          total_loss: 3196649793.422222
          vf_explained_var: -4.503462136540293e-08
          vf_loss: 3196649793.422222
    num_agent_steps_sampled: 1149000
    num_agent_steps_trained: 1149000
    num_steps_sampled: 1149000
    num_steps_trained: 1149000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1149,32432.2,1149000,-1.9922,-1.72,-2.05,199.22


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1150000
  custom_metrics: {}
  date: 2021-10-24_05-01-38
  done: false
  episode_len_mean: 199.22
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9922000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2756
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.790124204213847e-46
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0258933628598849
          total_loss: 3194835646.577778
          vf_explained_var: -5.960464477539063e-08
          vf_loss: 3194835646.577778
    num_agent_steps_sampled: 1150000
    num_agent_steps_trained: 1150000
    num_steps_sampled: 1150000
    num_steps_trained: 1150000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1150,32482.1,1150000,-1.9922,-1.72,-2.05,199.22




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1151000
  custom_metrics: {}
  date: 2021-10-24_05-02-49
  done: false
  episode_len_mean: 198.95
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989500000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2761
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.8950621021069234e-46
          cur_lr: 5.000000000000001e-05
          entropy: 0.0002968766577800529
          entropy_coeff: 0.009999999999999998
          kl: 8.848499335714323e-09
          policy_loss: -0.032480170163843365
          total_loss: 5656638557.866667
          vf_explained_var: -0.3333333432674408
          vf_loss: 5656638557.866667
    num_agent_steps_sampled: 1151000
    num_agent_steps_trained: 1151000
    num_steps_sampled: 1151000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1151,32553.2,1151000,-1.9895,-1.72,-2.05,198.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1152000
  custom_metrics: {}
  date: 2021-10-24_05-03-39
  done: false
  episode_len_mean: 198.95
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989500000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2766
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4475310510534617e-46
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.032072817285855613
          total_loss: 3647046496.711111
          vf_explained_var: 0.0
          vf_loss: 3647046496.711111
    num_agent_steps_sampled: 1152000
    num_agent_steps_trained: 1152000
    num_steps_sampled: 1152000
    num_steps_trained: 1152000
  iterations_since_restore: 1152
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1152,32603.2,1152000,-1.9895,-1.72,-2.05,198.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1153000
  custom_metrics: {}
  date: 2021-10-24_05-04-27
  done: false
  episode_len_mean: 199.23
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992300000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2771
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2237655255267309e-46
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.032072807351748146
          total_loss: 3223491541.3333335
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 3223491541.3333335
    num_agent_steps_sampled: 1153000
    num_agent_steps_trained: 1153000
    num_steps_sampled: 1153000
    num_steps_trained: 1153000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1153,32651.2,1153000,-1.9923,-1.72,-2.05,199.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1154000
  custom_metrics: {}
  date: 2021-10-24_05-05-19
  done: false
  episode_len_mean: 199.23
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992300000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2776
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.118827627633654e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0320729116598765
          total_loss: 3220455153.7777777
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 3220455153.7777777
    num_agent_steps_sampled: 1154000
    num_agent_steps_trained: 1154000
    num_steps_sampled: 1154000
    num_steps_trained: 1154000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1154,32702.5,1154000,-1.9923,-1.72,-2.05,199.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1155000
  custom_metrics: {}
  date: 2021-10-24_05-06-07
  done: false
  episode_len_mean: 199.23
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.992300000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2781
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.059413813816827e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03207282225290934
          total_loss: 3218117048.888889
          vf_explained_var: -5.6955549609938316e-08
          vf_loss: 3218117048.888889
    num_agent_steps_sampled: 1155000
    num_agent_steps_trained: 1155000
    num_steps_sampled: 1155000
    num_steps_trained: 1155000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1155,32750.7,1155000,-1.9923,-1.72,-2.05,199.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1156000
  custom_metrics: {}
  date: 2021-10-24_05-06-59
  done: false
  episode_len_mean: 199.23
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9923000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2786
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5297069069084136e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03207286943991979
          total_loss: 3215739403.3777776
          vf_explained_var: 1.1126200405442432e-07
          vf_loss: 3215739403.3777776
    num_agent_steps_sampled: 1156000
    num_agent_steps_trained: 1156000
    num_steps_sampled: 1156000
    num_steps_trained: 1156000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1156,32802.8,1156000,-1.9923,-1.72,-2.05,199.23




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1157000
  custom_metrics: {}
  date: 2021-10-24_05-08-06
  done: false
  episode_len_mean: 198.96
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989600000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2791
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.648534534542068e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.0006088608591299918
          entropy_coeff: 0.009999999999999998
          kl: 5.961255406469314e-05
          policy_loss: -0.03339289426803589
          total_loss: 5812675393.422222
          vf_explained_var: -0.3333333432674408
          vf_loss: 5812675393.422222
    num_agent_steps_sampled: 1157000
    num_agent_steps_trained: 1157000
    num_steps_sampled: 1157000
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1157,32870.1,1157000,-1.9896,-1.72,-2.05,198.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1158000
  custom_metrics: {}
  date: 2021-10-24_05-08-54
  done: false
  episode_len_mean: 198.96
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.989600000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2796
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.824267267271034e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031960448871056236
          total_loss: 3730847212.088889
          vf_explained_var: -1.1126200405442432e-07
          vf_loss: 3730847212.088889
    num_agent_steps_sampled: 1158000
    num_agent_steps_trained: 1158000
    num_steps_sampled: 1158000
    num_steps_trained: 1158000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1158,32918,1158000,-1.9896,-1.72,-2.05,198.96


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1159000
  custom_metrics: {}
  date: 2021-10-24_05-09-44
  done: false
  episode_len_mean: 199.24
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9924000000000006
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2801
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.912133633635517e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03196049233277639
          total_loss: 3204844455.822222
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 3204844455.822222
    num_agent_steps_sampled: 1159000
    num_agent_steps_trained: 1159000
    num_steps_sampled: 1159000
    num_steps_trained: 1159000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1159,32968,1159000,-1.9924,-1.73,-2.05,199.24


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1160000
  custom_metrics: {}
  date: 2021-10-24_05-10-33
  done: false
  episode_len_mean: 199.24
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9924000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2806
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.560668168177585e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03196042527755102
          total_loss: 3201522323.911111
          vf_explained_var: -4.7683716530855236e-08
          vf_loss: 3201522323.911111
    num_agent_steps_sampled: 1160000
    num_agent_steps_trained: 1160000
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1160,33016.2,1160000,-1.9924,-1.73,-2.05,199.24


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1161000
  custom_metrics: {}
  date: 2021-10-24_05-11-21
  done: false
  episode_len_mean: 199.24
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9924000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2811
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.7803340840887924e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031960458805163704
          total_loss: 3198411053.5111113
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 3198411053.5111113
    num_agent_steps_sampled: 1161000
    num_agent_steps_trained: 1161000
    num_steps_sampled: 1161000
    num_steps_trained: 1161000
  iterations_since_res

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1161,33064.8,1161000,-1.9924,-1.73,-2.05,199.24


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1162000
  custom_metrics: {}
  date: 2021-10-24_05-12-12
  done: false
  episode_len_mean: 199.24
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9924000000000008
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2816
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3901670420443962e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03196038926641146
          total_loss: 3195255361.422222
          vf_explained_var: 7.947286206899662e-08
          vf_loss: 3195255361.422222
    num_agent_steps_sampled: 1162000
    num_agent_steps_trained: 1162000
    num_steps_sampled: 1162000
    num_steps_trained: 1162000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1162,33115.2,1162000,-1.9924,-1.73,-2.05,199.24




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1163000
  custom_metrics: {}
  date: 2021-10-24_05-13-18
  done: false
  episode_len_mean: 199.01
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9901000000000009
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2821
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1950835210221981e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.00800502627260155
          entropy_coeff: 0.009999999999999998
          kl: 0.00010545805918001052
          policy_loss: -0.19044958187474145
          total_loss: 112588000568.88889
          vf_explained_var: -0.3333333432674408
          vf_loss: 112588000568.88889
    num_agent_steps_sampled: 1163000
    num_agent_steps_trained: 1163000
    num_steps_sampled: 1163000
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1163,33181.3,1163000,-1.9901,-1.73,-2.05,199.01


[2m[36m(pid=57533)[0m   return np.nanmean(tower_data)


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1164000
  custom_metrics: {}
  date: 2021-10-24_05-14-12
  done: false
  episode_len_mean: 199.07
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.990700000000001
  episode_reward_min: -2.0500000000000003
  episodes_this_iter: 5
  episodes_total: 2826
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.9754176051109905e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.029688040850063165
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.2609321278002527
          total_loss: 3158611634858.6665
          vf_explained_var: -0.20827975869178772
          vf_loss: 1066564210646.7556
    num_agent_steps_sampled: 1164000
    num_agent_steps_trained: 1164000
    num_steps_sampled: 1164000
    num_steps_trained: 1164000
  iteratio

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1164,33235.3,1164000,-1.9907,-1.73,-2.05,199.07




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1165000
  custom_metrics: {}
  date: 2021-10-24_05-15-02
  done: false
  episode_len_mean: 199.33
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9933000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2831
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.963126407666484e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.03232093349927002
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.14378186530537077
          total_loss: 14242373295581.867
          vf_explained_var: 0.5168465375900269
          vf_loss: 13692409601501.867
    num_agent_steps_sampled: 1165000
    num_agent_steps_trained: 1165000
    num_steps_sampled: 1165000
    num_steps_trained: 1165000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1165,33285.6,1165000,-1.9933,-1.73,-2.04,199.33




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1166000
  custom_metrics: {}
  date: 2021-10-24_05-15-54
  done: false
  episode_len_mean: 199.44
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9944000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2836
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.3444689611499732e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.058819650982817016
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.21369437906477187
          total_loss: 0.6017192820707957
          vf_explained_var: -0.15521138906478882
          vf_loss: 413798300285.30896
    num_agent_steps_sampled: 1166000
    num_agent_steps_trained: 1166000
    num_steps_sampled: 1166000
    num_steps_trained: 1166000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1166,33337.4,1166000,-1.9944,-1.73,-2.04,199.44


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1167000
  custom_metrics: {}
  date: 2021-10-24_05-16-45
  done: false
  episode_len_mean: 199.49
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9949000000000006
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2841
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0167034417249597e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.04358272287580702
          entropy_coeff: 0.009999999999999998
          kl: 0.00257610169440849
          policy_loss: -0.05749333666430579
          total_loss: 605719956202860.1
          vf_explained_var: -0.02645747922360897
          vf_loss: 605719956202860.1
    num_agent_steps_sampled: 1167000
    num_agent_steps_trained: 1167000
    num_steps_sampled: 1167000
    num_steps_trained: 116

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1167,33388.5,1167000,-1.9949,-1.73,-2.04,199.49




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1168000
  custom_metrics: {}
  date: 2021-10-24_05-17-37
  done: false
  episode_len_mean: 199.59
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.995900000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2846
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0083517208624799e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.11509581783579456
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02365934716330634
          total_loss: 369.17731550335884
          vf_explained_var: -0.9888888597488403
          vf_loss: 62397004707.85722
    num_agent_steps_sampled: 1168000
    num_agent_steps_trained: 1168000
    num_steps_sampled: 1168000
    num_steps_trained: 1168000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1168,33440,1168000,-1.9959,-1.73,-2.04,199.59




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1169000
  custom_metrics: {}
  date: 2021-10-24_05-18-44
  done: false
  episode_len_mean: 199.37
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993700000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2851
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5125275812937198e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.11749343454009957
          entropy_coeff: 0.009999999999999998
          kl: 0.024332101783990263
          policy_loss: 0.11563779794507556
          total_loss: 711022244224955.8
          vf_explained_var: -0.20726360380649567
          vf_loss: 711022244224955.8
    num_agent_steps_sampled: 1169000
    num_agent_steps_trained: 1169000
    num_steps_sampled: 1169000
    num_steps_trained: 1169

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1169,33507.8,1169000,-1.9937,-1.73,-2.04,199.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1170000
  custom_metrics: {}
  date: 2021-10-24_05-19-33
  done: false
  episode_len_mean: 199.47
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9947000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2856
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.268791371940579e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.27494298443198206
          entropy_coeff: 0.009999999999999998
          kl: 0.05524395838108628
          policy_loss: 0.06883764879571067
          total_loss: 20875765845560.89
          vf_explained_var: 0.6638044118881226
          vf_loss: 20875765845560.89
    num_agent_steps_sampled: 1170000
    num_agent_steps_trained: 1170000
    num_steps_sampled: 1170000
    num_steps_trained: 1170000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1170,33556,1170000,-1.9947,-1.73,-2.04,199.47


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1171000
  custom_metrics: {}
  date: 2021-10-24_05-20-22
  done: false
  episode_len_mean: 199.79
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9979000000000007
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2861
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.403187057910868e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.22529297628336484
          entropy_coeff: 0.009999999999999998
          kl: 0.0068438027749232865
          policy_loss: -0.004910387181573444
          total_loss: 23470226159843.555
          vf_explained_var: 0.3263261020183563
          vf_loss: 23470226159843.555
    num_agent_steps_sampled: 1171000
    num_agent_steps_trained: 1171000
    num_steps_sampled: 1171000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1171,33605.6,1171000,-1.9979,-1.73,-2.04,199.79


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1172000
  custom_metrics: {}
  date: 2021-10-24_05-21-09
  done: false
  episode_len_mean: 199.86
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.998600000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 2866
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.403187057910868e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.298720495402813
          entropy_coeff: 0.009999999999999998
          kl: 0.0193542680065183
          policy_loss: -0.18078081789943906
          total_loss: 2664247188957.8667
          vf_explained_var: 0.027403254061937332
          vf_loss: 2664247188957.8667
    num_agent_steps_sampled: 1172000
    num_agent_steps_trained: 1172000
    num_steps_sampled: 1172000
    num_steps_trained: 117200

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1172,33652.8,1172000,-1.9986,-1.73,-2.04,199.86




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1173000
  custom_metrics: {}
  date: 2021-10-24_05-21-58
  done: false
  episode_len_mean: 199.99
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9999000000000011
  episode_reward_min: -2.0799999999999996
  episodes_this_iter: 5
  episodes_total: 2871
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.403187057910868e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.44215335067775513
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.010102012732790575
          total_loss: 4576276853555.2
          vf_explained_var: -0.41342484951019287
          vf_loss: 78807247434547.2
    num_agent_steps_sampled: 1173000
    num_agent_steps_trained: 1173000
    num_steps_sampled: 1173000
    num_steps_trained: 1173000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1173,33701.1,1173000,-1.9999,-1.73,-2.08,199.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1174000
  custom_metrics: {}
  date: 2021-10-24_05-22-50
  done: false
  episode_len_mean: 200.07
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.000700000000001
  episode_reward_min: -2.0799999999999996
  episodes_this_iter: 5
  episodes_total: 2876
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.104780586866302e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.2861530779964394
          entropy_coeff: 0.009999999999999998
          kl: 0.012186220135766583
          policy_loss: -0.15339071469174492
          total_loss: 3744908291458.844
          vf_explained_var: -0.7881355881690979
          vf_loss: 3744908291458.844
    num_agent_steps_sampled: 1174000
    num_agent_steps_trained: 1174000
    num_steps_sampled: 1174000
    num_steps_trained: 117400

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1174,33753.4,1174000,-2.0007,-1.73,-2.08,200.07




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1175000
  custom_metrics: {}
  date: 2021-10-24_05-24-00
  done: false
  episode_len_mean: 199.86
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.9986000000000015
  episode_reward_min: -2.0799999999999996
  episodes_this_iter: 5
  episodes_total: 2881
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.104780586866302e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.4615424113141166
          entropy_coeff: 0.009999999999999998
          kl: 0.003386736739830967
          policy_loss: 0.008792402760850058
          total_loss: 390269669584987.0
          vf_explained_var: -1.0
          vf_loss: 390269669584987.0
    num_agent_steps_sampled: 1175000
    num_agent_steps_trained: 1175000
    num_steps_sampled: 1175000
    num_steps_trained: 1175000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1175,33822.9,1175000,-1.9986,-1.71,-2.08,199.86




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1176000
  custom_metrics: {}
  date: 2021-10-24_05-24-48
  done: false
  episode_len_mean: 199.99
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.9999000000000011
  episode_reward_min: -2.0799999999999996
  episodes_this_iter: 5
  episodes_total: 2886
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.552390293433151e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.656573611829016
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.008176411564151446
          total_loss: 49075696242961.07
          vf_explained_var: -1.0
          vf_loss: 37473686010083.555
    num_agent_steps_sampled: 1176000
    num_agent_steps_trained: 1176000
    num_steps_sampled: 1176000
    num_steps_trained: 1176000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1176,33871.6,1176000,-1.9999,-1.71,-2.08,199.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1177000
  custom_metrics: {}
  date: 2021-10-24_05-25-36
  done: false
  episode_len_mean: 200.41
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0041000000000007
  episode_reward_min: -2.0799999999999996
  episodes_this_iter: 5
  episodes_total: 2891
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.828585440149727e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.4484525892469618
          entropy_coeff: 0.009999999999999998
          kl: 0.0022520059489584267
          policy_loss: -0.03458118182089594
          total_loss: 17002240930247.111
          vf_explained_var: 0.050939273089170456
          vf_loss: 17002240930247.111
    num_agent_steps_sampled: 1177000
    num_agent_steps_trained: 1177000
    num_steps_sampled: 1177000
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1177,33918.8,1177000,-2.0041,-1.71,-2.08,200.41


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1178000
  custom_metrics: {}
  date: 2021-10-24_05-26-25
  done: false
  episode_len_mean: 200.54
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0054000000000007
  episode_reward_min: -2.0799999999999996
  episodes_this_iter: 5
  episodes_total: 2896
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9142927200748635e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.9310984747277365
          entropy_coeff: 0.009999999999999998
          kl: 0.0657981164085969
          policy_loss: 0.011463091046445899
          total_loss: 30642820593163.38
          vf_explained_var: -0.5345017910003662
          vf_loss: 30642820593163.38
    num_agent_steps_sampled: 1178000
    num_agent_steps_trained: 1178000
    num_steps_sampled: 1178000
    num_steps_trained: 117800

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1178,33968.5,1178000,-2.0054,-1.71,-2.08,200.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1179000
  custom_metrics: {}
  date: 2021-10-24_05-27-13
  done: false
  episode_len_mean: 201.01
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.010100000000001
  episode_reward_min: -2.239999999999996
  episodes_this_iter: 4
  episodes_total: 2900
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.871439080112295e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.9002611617247264
          entropy_coeff: 0.009999999999999998
          kl: 0.006534421171556895
          policy_loss: -0.011962875061564975
          total_loss: 6458421117656.178
          vf_explained_var: 0.271981418132782
          vf_loss: 6458421117656.178
    num_agent_steps_sampled: 1179000
    num_agent_steps_trained: 1179000
    num_steps_sampled: 1179000
    num_steps_trained: 1179000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1179,34015.9,1179000,-2.0101,-1.71,-2.24,201.01




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1180000
  custom_metrics: {}
  date: 2021-10-24_05-27-59
  done: false
  episode_len_mean: 201.49
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.014900000000001
  episode_reward_min: -2.239999999999996
  episodes_this_iter: 5
  episodes_total: 2905
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.871439080112295e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.5013590070936415
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.04213976346784168
          total_loss: 16574378899319.467
          vf_explained_var: -0.52232426404953
          vf_loss: 6526253368024.178
    num_agent_steps_sampled: 1180000
    num_agent_steps_trained: 1180000
    num_steps_sampled: 1180000
    num_steps_trained: 1180000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1180,34062,1180000,-2.0149,-1.71,-2.24,201.49




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1181000
  custom_metrics: {}
  date: 2021-10-24_05-28-41
  done: false
  episode_len_mean: 202.99
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0299000000000005
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 4
  episodes_total: 2909
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3071586201684435e-49
          cur_lr: 5.000000000000001e-05
          entropy: 1.1349005195829602
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.00051361211679048
          total_loss: 70263009968128.0
          vf_explained_var: -1.0
          vf_loss: 46613497800294.4
    num_agent_steps_sampled: 1181000
    num_agent_steps_trained: 1181000
    num_steps_sampled: 1181000
    num_steps_trained: 1181000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1181,34103.7,1181000,-2.0299,-1.71,-2.45,202.99




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1182000
  custom_metrics: {}
  date: 2021-10-24_05-29-50
  done: false
  episode_len_mean: 202.95
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0295000000000005
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2914
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.460737930252667e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0321223850051562
          entropy_coeff: 0.009999999999999998
          kl: 0.04945251809226142
          policy_loss: 0.02132894554071956
          total_loss: 2123339841900.0889
          vf_explained_var: 0.050995923578739166
          vf_loss: 2123339841900.0889
    num_agent_steps_sampled: 1182000
    num_agent_steps_trained: 1182000
    num_steps_sampled: 1182000
    num_steps_trained: 1182

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1182,34173,1182000,-2.0295,-1.71,-2.45,202.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1183000
  custom_metrics: {}
  date: 2021-10-24_05-30-39
  done: false
  episode_len_mean: 202.95
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0295
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2919
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.691106895378996e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0253692294160525
          total_loss: 4158062606.2222223
          vf_explained_var: 7.814831093355679e-08
          vf_loss: 4158062606.2222223
    num_agent_steps_sampled: 1183000
    num_agent_steps_trained: 1183000
    num_steps_sampled: 1183000
    num_steps_trained: 1183000
  iterations_since_restore: 1183
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1183,34221.7,1183000,-2.0295,-1.71,-2.45,202.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1184000
  custom_metrics: {}
  date: 2021-10-24_05-31-27
  done: false
  episode_len_mean: 203.14
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0314000000000005
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2924
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.845553447689498e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.025369213273127873
          total_loss: 3495572701.866667
          vf_explained_var: 7.285012060265217e-08
          vf_loss: 3495572701.866667
    num_agent_steps_sampled: 1184000
    num_agent_steps_trained: 1184000
    num_steps_sampled: 1184000
    num_steps_trained: 1184000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1184,34270.2,1184000,-2.0314,-1.71,-2.45,203.14


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1185000
  custom_metrics: {}
  date: 2021-10-24_05-32-17
  done: false
  episode_len_mean: 203.09
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0309
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2929
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.422776723844749e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02536920706431071
          total_loss: 3494558151.111111
          vf_explained_var: -3.5762788286319847e-08
          vf_loss: 3494558151.111111
    num_agent_steps_sampled: 1185000
    num_agent_steps_trained: 1185000
    num_steps_sampled: 1185000
    num_steps_trained: 1185000
  iterations_since_restore: 1185
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1185,34319.9,1185000,-2.0309,-1.71,-2.45,203.09


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1186000
  custom_metrics: {}
  date: 2021-10-24_05-33-05
  done: false
  episode_len_mean: 203.02
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0302000000000002
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2934
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2113883619223745e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.025369287778933842
          total_loss: 3493709593.6
          vf_explained_var: -1.2715658215256553e-07
          vf_loss: 3493709593.6
    num_agent_steps_sampled: 1186000
    num_agent_steps_trained: 1186000
    num_steps_sampled: 1186000
    num_steps_trained: 1186000
  iterations_since_restore: 1186


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1186,34368.1,1186000,-2.0302,-1.71,-2.45,203.02




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1187000
  custom_metrics: {}
  date: 2021-10-24_05-34-13
  done: false
  episode_len_mean: 202.66
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0266
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 6
  episodes_total: 2940
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.056941809611873e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.07357148577769597
          total_loss: 3492509132.8
          vf_explained_var: 0.0
          vf_loss: 3492509132.8
    num_agent_steps_sampled: 1187000
    num_agent_steps_trained: 1187000
    num_steps_sampled: 1187000
    num_steps_trained: 1187000
  iterations_since_restore: 1187
  node_ip: 172.17.0.2
  num_health

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1187,34436.1,1187000,-2.0266,-1.71,-2.45,202.66


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1188000
  custom_metrics: {}
  date: 2021-10-24_05-35-05
  done: false
  episode_len_mean: 202.59
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0259000000000005
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2945
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.0284709048059363e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.04038656875491142
          total_loss: 33988055458702.223
          vf_explained_var: 8.123252337099984e-05
          vf_loss: 33988055458702.223
    num_agent_steps_sampled: 1188000
    num_agent_steps_trained: 1188000
    num_steps_sampled: 1188000
    num_steps_trained: 1188000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1188,34487.5,1188000,-2.0259,-1.71,-2.45,202.59


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1189000
  custom_metrics: {}
  date: 2021-10-24_05-35-53
  done: false
  episode_len_mean: 202.79
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0279000000000003
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2950
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5142354524029681e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.04041230802734693
          total_loss: 4396105557.333333
          vf_explained_var: -7.152557657263969e-08
          vf_loss: 4396105557.333333
    num_agent_steps_sampled: 1189000
    num_agent_steps_trained: 1189000
    num_steps_sampled: 1189000
    num_steps_trained: 1189000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1189,34536.2,1189000,-2.0279,-1.71,-2.45,202.79


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1190000
  custom_metrics: {}
  date: 2021-10-24_05-36-44
  done: false
  episode_len_mean: 202.69
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0269000000000004
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2955
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.571177262014841e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.04041222731272379
          total_loss: 4422035498.666667
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 4422035498.666667
    num_agent_steps_sampled: 1190000
    num_agent_steps_trained: 1190000
    num_steps_sampled: 1190000
    num_steps_trained: 1190000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1190,34587.1,1190000,-2.0269,-1.71,-2.45,202.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1191000
  custom_metrics: {}
  date: 2021-10-24_05-37-33
  done: false
  episode_len_mean: 202.64
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0264000000000006
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2960
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7855886310074204e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.040412284433841705
          total_loss: 4420030526.577778
          vf_explained_var: -9.934107225717526e-08
          vf_loss: 4420030526.577778
    num_agent_steps_sampled: 1191000
    num_agent_steps_trained: 1191000
    num_steps_sampled: 1191000
    num_steps_trained: 1191000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1191,34635.6,1191000,-2.0264,-1.71,-2.45,202.64


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1192000
  custom_metrics: {}
  date: 2021-10-24_05-38-22
  done: false
  episode_len_mean: 202.57
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0257
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2965
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8927943155037102e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.04041230926911036
          total_loss: 4418233369.6
          vf_explained_var: 2.2517310682701464e-08
          vf_loss: 4418233369.6
    num_agent_steps_sampled: 1192000
    num_agent_steps_trained: 1192000
    num_steps_sampled: 1192000
    num_steps_trained: 1192000
  iterations_since_restore: 1192
  node_ip: 172.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1192,34685.1,1192000,-2.0257,-1.71,-2.45,202.57




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1193000
  custom_metrics: {}
  date: 2021-10-24_05-39-34
  done: false
  episode_len_mean: 202.18
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0218000000000003
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2970
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.463971577518551e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.09386775890986125
          total_loss: 4416376908.8
          vf_explained_var: 1.1920928955078125e-07
          vf_loss: 4416376908.8
    num_agent_steps_sampled: 1193000
    num_agent_steps_trained: 1193000
    num_steps_sampled: 1193000
    num_steps_trained: 1193000
  iterations_since_restore: 1193
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1193,34756.6,1193000,-2.0218,-1.71,-2.45,202.18


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1194000
  custom_metrics: {}
  date: 2021-10-24_05-40-24
  done: false
  episode_len_mean: 202.55
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0255000000000005
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 4
  episodes_total: 2974
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.7319857887592755e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.8143980450100369
          entropy_coeff: 0.009999999999999998
          kl: 0.010474649346571357
          policy_loss: -0.07932994883093569
          total_loss: 1.4649201725083922e+16
          vf_explained_var: 0.08083780854940414
          vf_loss: 1.4649201725083922e+16
    num_agent_steps_sampled: 1194000
    num_agent_steps_trained: 1194000
    num_steps_sampled: 1194000
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1194,34807,1194000,-2.0255,-1.71,-2.45,202.55




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1195000
  custom_metrics: {}
  date: 2021-10-24_05-41-12
  done: false
  episode_len_mean: 203.04
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0303999999999998
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2979
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.7319857887592755e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.6459961344798406
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.021175003920992214
          total_loss: 7881348830549.333
          vf_explained_var: 0.06714830547571182
          vf_loss: 21271032376706.844
    num_agent_steps_sampled: 1195000
    num_agent_steps_trained: 1195000
    num_steps_sampled: 1195000
    num_steps_trained: 1195000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1195,34855.1,1195000,-2.0304,-1.71,-2.45,203.04




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1196000
  custom_metrics: {}
  date: 2021-10-24_05-41-57
  done: false
  episode_len_mean: 204.54
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0454
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2984
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.097978683138918e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.6858391798204846
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0072732173320319915
          total_loss: 56782175076352.0
          vf_explained_var: -1.0
          vf_loss: 35735103508024.89
    num_agent_steps_sampled: 1196000
    num_agent_steps_trained: 1196000
    num_steps_sampled: 1196000
    num_steps_trained: 1196000
  iterations_since_restore: 1196
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1196,34900,1196000,-2.0454,-1.73,-2.45,204.54




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1197000
  custom_metrics: {}
  date: 2021-10-24_05-42-41
  done: false
  episode_len_mean: 205.4
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0540000000000003
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 4
  episodes_total: 2988
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0646968024708374e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.1267058531443277
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.028738055585159197
          total_loss: 277791169839104.0
          vf_explained_var: -1.0
          vf_loss: 128720370861488.36
    num_agent_steps_sampled: 1197000
    num_agent_steps_trained: 1197000
    num_steps_sampled: 1197000
    num_steps_trained: 1197000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1197,34943.9,1197000,-2.054,-1.73,-2.45,205.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1198000
  custom_metrics: {}
  date: 2021-10-24_05-43-31
  done: false
  episode_len_mean: 205.38
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0538
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2993
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.5970452037062557e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.32796466880374486
          entropy_coeff: 0.009999999999999998
          kl: 0.2719817121823629
          policy_loss: -0.09370480974515279
          total_loss: 5259398938868.622
          vf_explained_var: 0.28298839926719666
          vf_loss: 5259398938868.622
    num_agent_steps_sampled: 1198000
    num_agent_steps_trained: 1198000
    num_steps_sampled: 1198000
    num_steps_trained: 1198000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1198,34993.3,1198000,-2.0538,-1.73,-2.45,205.38


[2m[36m(pid=57533)[0m   lambda *s: None if s[0] is None else np.nanmean(s, axis=0),


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1199000
  custom_metrics: {}
  date: 2021-10-24_05-44-16
  done: false
  episode_len_mean: 205.96
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0596
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 2998
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.3955678055593843e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.0974616097079384
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.024183305373622312
          total_loss: .nan
          vf_explained_var: -0.8162588477134705
          vf_loss: 1268413675390020.2
    num_agent_steps_sampled: 1199000
    num_agent_steps_trained: 1199000
    num_steps_sampled: 1199000
    num_steps_trained: 1199000
  iterations_since_restore: 1199
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1199,35038.8,1199000,-2.0596,-1.73,-2.45,205.96




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1200000
  custom_metrics: {}
  date: 2021-10-24_05-45-24
  done: false
  episode_len_mean: 205.3
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.053
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 3003
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.593351708339076e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.22750431100527446
          entropy_coeff: 0.009999999999999998
          kl: 0.18435453904999627
          policy_loss: 0.08330292966630724
          total_loss: 2486523357957.689
          vf_explained_var: 0.27838942408561707
          vf_loss: 2486523357957.689
    num_agent_steps_sampled: 1200000
    num_agent_steps_trained: 1200000
    num_steps_sampled: 1200000
    num_steps_trained: 1200000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1200,35106.8,1200000,-2.053,-1.73,-2.45,205.3


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1201000
  custom_metrics: {}
  date: 2021-10-24_05-46-15
  done: false
  episode_len_mean: 203.95
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0395000000000003
  episode_reward_min: -2.4499999999999917
  episodes_this_iter: 5
  episodes_total: 3008
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.390027562508616e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03242552777131399
          total_loss: 3265785722743.467
          vf_explained_var: -3.510051271859993e-08
          vf_loss: 3265785722743.467
    num_agent_steps_sampled: 1201000
    num_agent_steps_trained: 1201000
    num_steps_sampled: 1201000
    num_steps_trained: 1201000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1201,35157.2,1201000,-2.0395,-1.73,-2.45,203.95


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1202000
  custom_metrics: {}
  date: 2021-10-24_05-47-05
  done: false
  episode_len_mean: 203.54
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0354
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3013
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.695013781254308e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03242558737595876
          total_loss: 6998524074.666667
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 6998524074.666667
    num_agent_steps_sampled: 1202000
    num_agent_steps_trained: 1202000
    num_steps_sampled: 1202000
    num_steps_trained: 1202000
  iterations_since_restore: 1202
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1202,35207.6,1202000,-2.0354,-1.73,-2.28,203.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1203000
  custom_metrics: {}
  date: 2021-10-24_05-47-55
  done: false
  episode_len_mean: 203.54
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0354
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3018
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.347506890627154e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0324254035949707
          total_loss: 6976435620.9777775
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 6976435620.9777775
    num_agent_steps_sampled: 1203000
    num_agent_steps_trained: 1203000
    num_steps_sampled: 1203000
    num_steps_trained: 1203000
  iterations_since_restore: 1203
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1203,35257.6,1203000,-2.0354,-1.73,-2.28,203.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1204000
  custom_metrics: {}
  date: 2021-10-24_05-48-43
  done: false
  episode_len_mean: 203.54
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0354
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3023
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.73753445313577e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.032425651947657265
          total_loss: 6974563919.644444
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 6974563919.644444
    num_agent_steps_sampled: 1204000
    num_agent_steps_trained: 1204000
    num_steps_sampled: 1204000
    num_steps_trained: 1204000
  iterations_since_restore: 1204
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1204,35305.6,1204000,-2.0354,-1.73,-2.28,203.54


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1205000
  custom_metrics: {}
  date: 2021-10-24_05-49-32
  done: false
  episode_len_mean: 203.54
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0354
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3028
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.368767226567885e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03242544581492742
          total_loss: 6973651126.044444
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 6973651126.044444
    num_agent_steps_sampled: 1205000
    num_agent_steps_trained: 1205000
    num_steps_sampled: 1205000
    num_steps_trained: 1205000
  iterations_since_restore: 1205
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1205,35354.7,1205000,-2.0354,-1.73,-2.28,203.54




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1206000
  custom_metrics: {}
  date: 2021-10-24_05-50-40
  done: false
  episode_len_mean: 203.3
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.033
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3033
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6843836132839426e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.5305430889129639
          entropy_coeff: 0.009999999999999998
          kl: 0.3916638996866014
          policy_loss: -0.012598193602429496
          total_loss: 6507095116734811.0
          vf_explained_var: -0.012262620031833649
          vf_loss: 6507095116734811.0
    num_agent_steps_sampled: 1206000
    num_agent_steps_trained: 1206000
    num_steps_sampled: 1206000
    num_steps_trained: 1206000
  itera

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1206,35422.9,1206000,-2.033,-1.73,-2.28,203.3




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1207000
  custom_metrics: {}
  date: 2021-10-24_05-51-27
  done: false
  episode_len_mean: 203.94
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0393999999999997
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 4
  episodes_total: 3037
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.5265754199259124e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.7556398881806268
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.023001928379138312
          total_loss: .nan
          vf_explained_var: -0.7868529558181763
          vf_loss: 169458283039948.8
    num_agent_steps_sampled: 1207000
    num_agent_steps_trained: 1207000
    num_steps_sampled: 1207000
    num_steps_trained: 1207000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1207,35469.3,1207000,-2.0394,-1.73,-2.28,203.94


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1208000
  custom_metrics: {}
  date: 2021-10-24_05-52-16
  done: false
  episode_len_mean: 204.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0432
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3042
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.789863129888869e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.026343743006388348
          total_loss: 117939080760.88889
          vf_explained_var: 8.54333208621938e-08
          vf_loss: 117939080760.88889
    num_agent_steps_sampled: 1208000
    num_agent_steps_trained: 1208000
    num_steps_sampled: 1208000
    num_steps_trained: 1208000
  iterations_since_restore: 1208
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1208,35518.3,1208000,-2.0432,-1.73,-2.28,204.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1209000
  custom_metrics: {}
  date: 2021-10-24_05-53-06
  done: false
  episode_len_mean: 204.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0432
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3047
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8949315649444344e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.026343730588754017
          total_loss: 5885904725.333333
          vf_explained_var: 2.7153227222243004e-08
          vf_loss: 5885904725.333333
    num_agent_steps_sampled: 1209000
    num_agent_steps_trained: 1209000
    num_steps_sampled: 1209000
    num_steps_trained: 1209000
  iterations_since_restore: 1209
  n

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1209,35568.2,1209000,-2.0432,-1.73,-2.28,204.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1210000
  custom_metrics: {}
  date: 2021-10-24_05-53-55
  done: false
  episode_len_mean: 204.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0432
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3052
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.474657824722172e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.026343760391076405
          total_loss: 5779677138.488889
          vf_explained_var: -5.960464477539063e-08
          vf_loss: 5779677138.488889
    num_agent_steps_sampled: 1210000
    num_agent_steps_trained: 1210000
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
  iterations_since_restore: 1210
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1210,35617.5,1210000,-2.0432,-1.73,-2.28,204.32


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1211000
  custom_metrics: {}
  date: 2021-10-24_05-54-45
  done: false
  episode_len_mean: 204.32
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.0432
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3057
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.737328912361086e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.02634381006161372
          total_loss: 5778528335.644444
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 5778528335.644444
    num_agent_steps_sampled: 1211000
    num_agent_steps_trained: 1211000
    num_steps_sampled: 1211000
    num_steps_trained: 1211000
  iterations_since_restore: 1211
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1211,35667.3,1211000,-2.0432,-1.73,-2.28,204.32




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1212000
  custom_metrics: {}
  date: 2021-10-24_05-55-54
  done: false
  episode_len_mean: 204.07
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0406999999999997
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 6
  episodes_total: 3063
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.368664456180543e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.02312212081419097
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.04866973600453801
          total_loss: 6990362709.333333
          vf_explained_var: -0.16514019668102264
          vf_loss: 18267675955.2
    num_agent_steps_sampled: 1212000
    num_agent_steps_trained: 1212000
    num_steps_sampled: 1212000
    num_steps_trained: 1212000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1212,35736.4,1212000,-2.0407,-1.71,-2.28,204.07


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1213000
  custom_metrics: {}
  date: 2021-10-24_05-56-43
  done: false
  episode_len_mean: 204.07
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0406999999999997
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3068
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5529966842708142e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05901974936326345
          total_loss: 9299007795.2
          vf_explained_var: 0.0
          vf_loss: 9299007795.2
    num_agent_steps_sampled: 1213000
    num_agent_steps_trained: 1213000
    num_steps_sampled: 1213000
    num_steps_trained: 1213000
  iterations_since_restore: 1213
  node_ip: 172.17.0.2


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1213,35784.8,1213000,-2.0407,-1.71,-2.28,204.07


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1214000
  custom_metrics: {}
  date: 2021-10-24_05-57-32
  done: false
  episode_len_mean: 204.02
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0402
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3073
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.7764983421354071e-53
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.059019800275564194
          total_loss: 6069808480.711111
          vf_explained_var: -5.960464477539063e-08
          vf_loss: 6069808480.711111
    num_agent_steps_sampled: 1214000
    num_agent_steps_trained: 1214000
    num_steps_sampled: 1214000
    num_steps_trained: 1214000
  iterations_since_restore: 1214
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1214,35833.8,1214000,-2.0402,-1.71,-2.28,204.02


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1215000
  custom_metrics: {}
  date: 2021-10-24_05-58-20
  done: false
  episode_len_mean: 203.45
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0345
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3078
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.882491710677036e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05901985988020897
          total_loss: 6068305914.311111
          vf_explained_var: -5.960464477539063e-08
          vf_loss: 6068305914.311111
    num_agent_steps_sampled: 1215000
    num_agent_steps_trained: 1215000
    num_steps_sampled: 1215000
    num_steps_trained: 1215000
  iterations_since_restore: 1215
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1215,35882.3,1215000,-2.0345,-1.71,-2.28,203.45


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1216000
  custom_metrics: {}
  date: 2021-10-24_05-59-11
  done: false
  episode_len_mean: 202.28
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0228
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3083
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.441245855338518e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.05901998778184255
          total_loss: 6066781479.822222
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 6066781479.822222
    num_agent_steps_sampled: 1216000
    num_agent_steps_trained: 1216000
    num_steps_sampled: 1216000
    num_steps_trained: 1216000
  iterations_since_restore: 1216
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1216,35933.5,1216000,-2.0228,-1.71,-2.28,202.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1217000
  custom_metrics: {}
  date: 2021-10-24_06-00-03
  done: false
  episode_len_mean: 201.03
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0103000000000004
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3088
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.220622927669259e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.059019702176253
          total_loss: 6065429384.533334
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 6065429384.533334
    num_agent_steps_sampled: 1217000
    num_agent_steps_trained: 1217000
    num_steps_sampled: 1217000
    num_steps_trained: 1217000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1217,35985,1217000,-2.0103,-1.71,-2.28,201.03




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1218000
  custom_metrics: {}
  date: 2021-10-24_06-01-12
  done: false
  episode_len_mean: 200.67
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0067000000000004
  episode_reward_min: -2.2799999999999954
  episodes_this_iter: 5
  episodes_total: 3093
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1103114638346295e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.009245293546054098
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.028210252854559158
          total_loss: 7580973047.466666
          vf_explained_var: -0.3333333432674408
          vf_loss: 18138684478.577778
    num_agent_steps_sampled: 1218000
    num_agent_steps_trained: 1218000
    num_steps_sampled: 1218000
    num_steps_trained: 1218000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1218,36053.9,1218000,-2.0067,-1.71,-2.28,200.67


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1219000
  custom_metrics: {}
  date: 2021-10-24_06-02-00
  done: false
  episode_len_mean: 199.75
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.9975000000000005
  episode_reward_min: -2.199999999999997
  episodes_this_iter: 5
  episodes_total: 3098
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.665467195751945e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0324559211730957
          total_loss: 10669434311.11111
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 10669434311.11111
    num_agent_steps_sampled: 1219000
    num_agent_steps_trained: 1219000
    num_steps_sampled: 1219000
    num_steps_trained: 1219000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1219,36102.4,1219000,-1.9975,-1.71,-2.2,199.75


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1220000
  custom_metrics: {}
  date: 2021-10-24_06-02-50
  done: false
  episode_len_mean: 200.01
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.000100000000001
  episode_reward_min: -2.199999999999997
  episodes_this_iter: 5
  episodes_total: 3103
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.327335978759726e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03245578954617182
          total_loss: 6960435859.911111
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 6960435859.911111
    num_agent_steps_sampled: 1220000
    num_agent_steps_trained: 1220000
    num_steps_sampled: 1220000
    num_steps_trained: 1220000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1220,36152.3,1220000,-2.0001,-1.71,-2.2,200.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1221000
  custom_metrics: {}
  date: 2021-10-24_06-03-40
  done: false
  episode_len_mean: 200.01
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0001000000000007
  episode_reward_min: -2.199999999999997
  episodes_this_iter: 5
  episodes_total: 3108
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.163667989379863e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.032455841700236
          total_loss: 6959301239.466666
          vf_explained_var: -1.1920928955078125e-07
          vf_loss: 6959301239.466666
    num_agent_steps_sampled: 1221000
    num_agent_steps_trained: 1221000
    num_steps_sampled: 1221000
    num_steps_trained: 1221000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1221,36202.5,1221000,-2.0001,-1.71,-2.2,200.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1222000
  custom_metrics: {}
  date: 2021-10-24_06-04-28
  done: false
  episode_len_mean: 200.01
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.000100000000001
  episode_reward_min: -2.199999999999997
  episodes_this_iter: 5
  episodes_total: 3113
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0818339946899314e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03245598326126734
          total_loss: 6956929228.8
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 6956929228.8
    num_agent_steps_sampled: 1222000
    num_agent_steps_trained: 1222000
    num_steps_sampled: 1222000
    num_steps_trained: 1222000
  iterations_since_restore: 1222
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1222,36250.2,1222000,-2.0001,-1.71,-2.2,200.01


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1223000
  custom_metrics: {}
  date: 2021-10-24_06-05-18
  done: false
  episode_len_mean: 200.01
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0001000000000007
  episode_reward_min: -2.199999999999997
  episodes_this_iter: 5
  episodes_total: 3118
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0409169973449657e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03245602548122406
          total_loss: 6954663463.822222
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 6954663463.822222
    num_agent_steps_sampled: 1223000
    num_agent_steps_trained: 1223000
    num_steps_sampled: 1223000
    num_steps_trained: 1223000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1223,36300.1,1223000,-2.0001,-1.71,-2.2,200.01




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1224000
  custom_metrics: {}
  date: 2021-10-24_06-06-24
  done: false
  episode_len_mean: 199.75
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.997500000000001
  episode_reward_min: -2.199999999999997
  episodes_this_iter: 5
  episodes_total: 3123
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.2045849867248285e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.019650463097625307
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.005098685622215271
          total_loss: 7333478843.733334
          vf_explained_var: -0.3333333432674408
          vf_loss: 19601925893.68889
    num_agent_steps_sampled: 1224000
    num_agent_steps_trained: 1224000
    num_steps_sampled: 1224000
    num_steps_trained: 1224000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1224,36366.5,1224000,-1.9975,-1.71,-2.2,199.75


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1225000
  custom_metrics: {}
  date: 2021-10-24_06-07-12
  done: false
  episode_len_mean: 199.75
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.9975000000000005
  episode_reward_min: -2.199999999999997
  episodes_this_iter: 5
  episodes_total: 3128
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.806877480087239e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03205187867085139
          total_loss: 10725154759.11111
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 10725154759.11111
    num_agent_steps_sampled: 1225000
    num_agent_steps_trained: 1225000
    num_steps_sampled: 1225000
    num_steps_trained: 1225000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1225,36414.3,1225000,-1.9975,-1.71,-2.2,199.75


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1226000
  custom_metrics: {}
  date: 2021-10-24_06-08-06
  done: false
  episode_len_mean: 199.99
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.9999000000000007
  episode_reward_min: -2.199999999999997
  episodes_this_iter: 5
  episodes_total: 3133
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.9034387400436196e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.032051886121431984
          total_loss: 6909447793.777778
          vf_explained_var: -9.934107225717526e-08
          vf_loss: 6909447793.777778
    num_agent_steps_sampled: 1226000
    num_agent_steps_trained: 1226000
    num_steps_sampled: 1226000
    num_steps_trained: 1226000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1226,36467.9,1226000,-1.9999,-1.71,-2.2,199.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1227000
  custom_metrics: {}
  date: 2021-10-24_06-08-54
  done: false
  episode_len_mean: 199.23
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.9923000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 3138
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9517193700218098e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03205189605553945
          total_loss: 6906874919.822222
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 6906874919.822222
    num_agent_steps_sampled: 1227000
    num_agent_steps_trained: 1227000
    num_steps_sampled: 1227000
    num_steps_trained: 1227000
  iterations_since_resto

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1227,36515.6,1227000,-1.9923,-1.71,-2.04,199.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1228000
  custom_metrics: {}
  date: 2021-10-24_06-09-49
  done: false
  episode_len_mean: 199.23
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.992300000000001
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 3143
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.758596850109049e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03205192585786184
          total_loss: 6903995096.177778
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 6903995096.177778
    num_agent_steps_sampled: 1228000
    num_agent_steps_trained: 1228000
    num_steps_sampled: 1228000
    num_steps_trained: 1228000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1228,36571,1228000,-1.9923,-1.71,-2.04,199.23


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1229000
  custom_metrics: {}
  date: 2021-10-24_06-10-37
  done: false
  episode_len_mean: 199.23
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.9923000000000008
  episode_reward_min: -2.0400000000000005
  episodes_this_iter: 5
  episodes_total: 3148
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.8792984250545245e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03205195814371109
          total_loss: 6901105595.733334
          vf_explained_var: -5.960464477539063e-08
          vf_loss: 6901105595.733334
    num_agent_steps_sampled: 1229000
    num_agent_steps_trained: 1229000
    num_steps_sampled: 1229000
    num_steps_trained: 1229000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1229,36619.1,1229000,-1.9923,-1.71,-2.04,199.23




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1230000
  custom_metrics: {}
  date: 2021-10-24_06-11-43
  done: false
  episode_len_mean: 199.11
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.9911000000000008
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3153
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4396492125272622e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.02193146285911401
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0047144278883934024
          total_loss: 7033359189.333333
          vf_explained_var: -0.3333333432674408
          vf_loss: 28652084542.577778
    num_agent_steps_sampled: 1230000
    num_agent_steps_trained: 1230000
    num_steps_sampled: 1230000
    num_steps_trained: 1230000
  iterati

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1230,36685.1,1230000,-1.9911,-1.71,-2.15,199.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1231000
  custom_metrics: {}
  date: 2021-10-24_06-12-31
  done: false
  episode_len_mean: 199.11
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.991100000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3158
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.6594738187908933e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03199508289496104
          total_loss: 12820862395.733334
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 12820862395.733334
    num_agent_steps_sampled: 1231000
    num_agent_steps_trained: 1231000
    num_steps_sampled: 1231000
    num_steps_trained: 1231000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1231,36733.1,1231000,-1.9911,-1.71,-2.15,199.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1232000
  custom_metrics: {}
  date: 2021-10-24_06-13-20
  done: false
  episode_len_mean: 199.36
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993600000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3163
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.8297369093954467e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03199522693951925
          total_loss: 6882151896.177778
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 6882151896.177778
    num_agent_steps_sampled: 1232000
    num_agent_steps_trained: 1232000
    num_steps_sampled: 1232000
    num_steps_trained: 1232000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1232,36781.8,1232000,-1.9936,-1.73,-2.15,199.36


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1233000
  custom_metrics: {}
  date: 2021-10-24_06-14-10
  done: false
  episode_len_mean: 199.36
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993600000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3168
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.148684546977233e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.0319951723019282
          total_loss: 6874694274.844444
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 6874694274.844444
    num_agent_steps_sampled: 1233000
    num_agent_steps_trained: 1233000
    num_steps_sampled: 1233000
    num_steps_trained: 1233000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1233,36831.6,1233000,-1.9936,-1.73,-2.15,199.36


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1234000
  custom_metrics: {}
  date: 2021-10-24_06-14-59
  done: false
  episode_len_mean: 199.36
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.9936000000000007
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3173
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.574342273488617e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031995244324207306
          total_loss: 6868990441.244445
          vf_explained_var: -7.947286206899662e-08
          vf_loss: 6868990441.244445
    num_agent_steps_sampled: 1234000
    num_agent_steps_trained: 1234000
    num_steps_sampled: 1234000
    num_steps_trained: 1234000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1234,36880.3,1234000,-1.9936,-1.73,-2.15,199.36


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1235000
  custom_metrics: {}
  date: 2021-10-24_06-15-47
  done: false
  episode_len_mean: 199.36
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -1.993600000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3178
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.2871711367443083e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031995187203089394
          total_loss: 6862794018.133333
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 6862794018.133333
    num_agent_steps_sampled: 1235000
    num_agent_steps_trained: 1235000
    num_steps_sampled: 1235000
    num_steps_trained: 1235000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1235,36928.6,1235000,-1.9936,-1.73,-2.15,199.36




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1236000
  custom_metrics: {}
  date: 2021-10-24_06-16-54
  done: false
  episode_len_mean: 199.12
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.991200000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3183
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1435855683721542e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.014425094177325567
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.011024680568112268
          total_loss: 5727342225.066667
          vf_explained_var: -0.3333333432674408
          vf_loss: 23902213353.244446
    num_agent_steps_sampled: 1236000
    num_agent_steps_trained: 1236000
    num_steps_sampled: 1236000
    num_steps_trained: 1236000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1236,36996.1,1236000,-1.9912,-1.72,-2.15,199.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1237000
  custom_metrics: {}
  date: 2021-10-24_06-17-43
  done: false
  episode_len_mean: 199.12
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9912000000000012
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3188
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.715378352558231e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03192800283432007
          total_loss: 11800650911.288889
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 11800650911.288889
    num_agent_steps_sampled: 1237000
    num_agent_steps_trained: 1237000
    num_steps_sampled: 1237000
    num_steps_trained: 1237000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1237,37044.4,1237000,-1.9912,-1.72,-2.15,199.12


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1238000
  custom_metrics: {}
  date: 2021-10-24_06-18-31
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9938000000000011
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3193
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.576891762791156e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031927997867266335
          total_loss: 6842221118.577778
          vf_explained_var: 0.0
          vf_loss: 6842221118.577778
    num_agent_steps_sampled: 1238000
    num_agent_steps_trained: 1238000
    num_steps_sampled: 1238000
    num_steps_trained: 1238000
  iterations_since_restore: 1238
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1238,37092.8,1238000,-1.9938,-1.72,-2.15,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1239000
  custom_metrics: {}
  date: 2021-10-24_06-19-20
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9938000000000011
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3198
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.288445881395578e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03192802766958872
          total_loss: 6834340414.577778
          vf_explained_var: 1.9868215517249155e-08
          vf_loss: 6834340414.577778
    num_agent_steps_sampled: 1239000
    num_agent_steps_trained: 1239000
    num_steps_sampled: 1239000
    num_steps_trained: 1239000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1239,37141.8,1239000,-1.9938,-1.72,-2.15,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1240000
  custom_metrics: {}
  date: 2021-10-24_06-20-09
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.993800000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3203
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.144222940697789e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03192801276842753
          total_loss: 6829088768.0
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 6829088768.0
    num_agent_steps_sampled: 1240000
    num_agent_steps_trained: 1240000
    num_steps_sampled: 1240000
    num_steps_trained: 1240000
  iterations_since_restore: 1240
  no

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1240,37190.5,1240000,-1.9938,-1.72,-2.15,199.38


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1241000
  custom_metrics: {}
  date: 2021-10-24_06-20-58
  done: false
  episode_len_mean: 199.38
  episode_media: {}
  episode_reward_max: -1.7200000000000013
  episode_reward_mean: -1.9938000000000011
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3208
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0721114703488945e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03192813446124395
          total_loss: 6822639160.888889
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 6822639160.888889
    num_agent_steps_sampled: 1241000
    num_agent_steps_trained: 1241000
    num_steps_sampled: 1241000
    num_steps_trained: 1241000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1241,37239.3,1241000,-1.9938,-1.72,-2.15,199.38




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1242000
  custom_metrics: {}
  date: 2021-10-24_06-22-05
  done: false
  episode_len_mean: 199.11
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.9911000000000012
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3213
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.360557351744472e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.01870328262448311
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.024350632064872316
          total_loss: 5986408763.733334
          vf_explained_var: -0.3333333432674408
          vf_loss: 20141744748.08889
    num_agent_steps_sampled: 1242000
    num_agent_steps_trained: 1242000
    num_steps_sampled: 1242000
    num_steps_trained: 1242000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1242,37306.3,1242000,-1.9911,-1.71,-2.15,199.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1243000
  custom_metrics: {}
  date: 2021-10-24_06-22-52
  done: false
  episode_len_mean: 199.11
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.991100000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3218
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.040836027616711e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03175302967429161
          total_loss: 10386075960.88889
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 10386075960.88889
    num_agent_steps_sampled: 1243000
    num_agent_steps_trained: 1243000
    num_steps_sampled: 1243000
    num_steps_trained: 1243000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1243,37354,1243000,-1.9911,-1.71,-2.15,199.11


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1244000
  custom_metrics: {}
  date: 2021-10-24_06-23-41
  done: false
  episode_len_mean: 199.37
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.993700000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3223
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.0204180138083554e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03175296758611997
          total_loss: 6797045316.266666
          vf_explained_var: 3.973643103449831e-08
          vf_loss: 6797045316.266666
    num_agent_steps_sampled: 1244000
    num_agent_steps_trained: 1244000
    num_steps_sampled: 1244000
    num_steps_trained: 1244000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1244,37402.7,1244000,-1.9937,-1.71,-2.15,199.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1245000
  custom_metrics: {}
  date: 2021-10-24_06-24-30
  done: false
  episode_len_mean: 199.37
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.993700000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3228
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.0102090069041777e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03175300111373266
          total_loss: 6787994897.066667
          vf_explained_var: -3.973643103449831e-08
          vf_loss: 6787994897.066667
    num_agent_steps_sampled: 1245000
    num_agent_steps_trained: 1245000
    num_steps_sampled: 1245000
    num_steps_trained: 1245000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1245,37451.7,1245000,-1.9937,-1.71,-2.15,199.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1246000
  custom_metrics: {}
  date: 2021-10-24_06-25-19
  done: false
  episode_len_mean: 199.37
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.993700000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3233
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0051045034520888e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.031753056993087135
          total_loss: 6780546833.066667
          vf_explained_var: 0.0
          vf_loss: 6780546833.066667
    num_agent_steps_sampled: 1246000
    num_agent_steps_trained: 1246000
    num_steps_sampled: 1246000
    num_steps_trained: 1246000
  iterations_since_restore: 1246
  node_ip: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1246,37500.2,1246000,-1.9937,-1.71,-2.15,199.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1247000
  custom_metrics: {}
  date: 2021-10-24_06-26-08
  done: false
  episode_len_mean: 199.37
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.993700000000001
  episode_reward_min: -2.149999999999998
  episodes_this_iter: 5
  episodes_total: 3238
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.025522517260444e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03175299117962519
          total_loss: 6772764501.333333
          vf_explained_var: -1.9868215517249155e-08
          vf_loss: 6772764501.333333
    num_agent_steps_sampled: 1247000
    num_agent_steps_trained: 1247000
    num_steps_sampled: 1247000
    num_steps_trained: 1247000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1247,37549.3,1247000,-1.9937,-1.71,-2.15,199.37




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1248000
  custom_metrics: {}
  date: 2021-10-24_06-27-13
  done: false
  episode_len_mean: 199.71
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -1.997100000000001
  episode_reward_min: -2.249999999999996
  episodes_this_iter: 5
  episodes_total: 3243
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.512761258630222e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.44222999608351127
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.025828727500306237
          total_loss: 1845896699778389.2
          vf_explained_var: -0.5821906924247742
          vf_loss: 1230630497336524.8
    num_agent_steps_sampled: 1248000
    num_agent_steps_trained: 1248000
    num_steps_sampled: 1248000
    num_steps_trained: 1248000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1248,37614.6,1248000,-1.9971,-1.71,-2.25,199.71




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1249000
  custom_metrics: {}
  date: 2021-10-24_06-28-03
  done: false
  episode_len_mean: 200.17
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0017000000000014
  episode_reward_min: -2.249999999999996
  episodes_this_iter: 5
  episodes_total: 3248
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7691418879453334e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.5584140761031045
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.02645536404516962
          total_loss: 293617576269141.3
          vf_explained_var: 0.43647605180740356
          vf_loss: 204342314285283.56
    num_agent_steps_sampled: 1249000
    num_agent_steps_trained: 1249000
    num_steps_sampled: 1249000
    num_steps_trained: 1249000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1249,37664.7,1249000,-2.0017,-1.71,-2.25,200.17


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1250000
  custom_metrics: {}
  date: 2021-10-24_06-28-52
  done: false
  episode_len_mean: 200.5
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.005000000000001
  episode_reward_min: -2.249999999999996
  episodes_this_iter: 5
  episodes_total: 3253
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.653712831917998e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.10344322808086873
          entropy_coeff: 0.009999999999999998
          kl: 0.0031257293210088795
          policy_loss: -0.017131240748696857
          total_loss: 314917504679936.0
          vf_explained_var: -0.044927146285772324
          vf_loss: 314917504679936.0
    num_agent_steps_sampled: 1250000
    num_agent_steps_trained: 1250000
    num_steps_sampled: 1250000
    num_steps_trained: 125

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1250,37713.7,1250000,-2.005,-1.71,-2.25,200.5


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1251000
  custom_metrics: {}
  date: 2021-10-24_06-29-44
  done: false
  episode_len_mean: 200.69
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0069000000000004
  episode_reward_min: -2.249999999999996
  episodes_this_iter: 5
  episodes_total: 3258
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.826856415958999e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.07878298655980163
          entropy_coeff: 0.009999999999999998
          kl: 0.0008870374929103006
          policy_loss: 0.022521452440155878
          total_loss: 539686296325.6889
          vf_explained_var: -0.7064577341079712
          vf_loss: 539686296325.6889
    num_agent_steps_sampled: 1251000
    num_agent_steps_trained: 1251000
    num_steps_sampled: 1251000
    num_steps_trained: 1251

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1251,37765.4,1251000,-2.0069,-1.71,-2.25,200.69


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1252000
  custom_metrics: {}
  date: 2021-10-24_06-30-34
  done: false
  episode_len_mean: 200.84
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0084000000000004
  episode_reward_min: -2.249999999999996
  episodes_this_iter: 4
  episodes_total: 3262
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4134282079794995e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.1378976084291935
          entropy_coeff: 0.009999999999999998
          kl: 0.004321786135352094
          policy_loss: -0.03491373699572351
          total_loss: 1041467113472.0
          vf_explained_var: -1.0
          vf_loss: 1041467113472.0
    num_agent_steps_sampled: 1252000
    num_agent_steps_trained: 1252000
    num_steps_sampled: 1252000
    num_steps_trained: 1252000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1252,37815.2,1252000,-2.0084,-1.71,-2.25,200.84


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1253000
  custom_metrics: {}
  date: 2021-10-24_06-31-24
  done: false
  episode_len_mean: 201.05
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.010500000000001
  episode_reward_min: -2.249999999999996
  episodes_this_iter: 5
  episodes_total: 3267
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.0671410398974975e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.27669965939389335
          entropy_coeff: 0.009999999999999998
          kl: 0.004387236721050196
          policy_loss: -0.024030725616547795
          total_loss: 81314424390360.17
          vf_explained_var: -1.0
          vf_loss: 81314424390360.17
    num_agent_steps_sampled: 1253000
    num_agent_steps_trained: 1253000
    num_steps_sampled: 1253000
    num_steps_trained: 1253000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1253,37865.3,1253000,-2.0105,-1.71,-2.25,201.05




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1254000
  custom_metrics: {}
  date: 2021-10-24_06-32-33
  done: false
  episode_len_mean: 200.99
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.0495000000000005
  episode_reward_min: -6.02999999999996
  episodes_this_iter: 5
  episodes_total: 3272
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.5335705199487487e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.21249699894752766
          entropy_coeff: 0.009999999999999998
          kl: 0.09514811200952458
          policy_loss: 0.05222104373905394
          total_loss: 3838665240484.9775
          vf_explained_var: -0.7433728575706482
          vf_loss: 3838665240484.9775
    num_agent_steps_sampled: 1254000
    num_agent_steps_trained: 1254000
    num_steps_sampled: 1254000
    num_steps_trained: 12540

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1254,37934.3,1254000,-2.0495,-1.71,-6.03,200.99




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1255000
  custom_metrics: {}
  date: 2021-10-24_06-33-20
  done: false
  episode_len_mean: 201.43
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.1033999999999997
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3277
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.300355779923124e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.2354810236228837
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0448852117276854
          total_loss: 16953653155157.334
          vf_explained_var: 0.12339229881763458
          vf_loss: 13690920559502.223
    num_agent_steps_sampled: 1255000
    num_agent_steps_trained: 1255000
    num_steps_sampled: 1255000
    num_steps_trained: 1255000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1255,37981.6,1255000,-2.1034,-1.71,-7.07,201.43




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1256000
  custom_metrics: {}
  date: 2021-10-24_06-34-11
  done: false
  episode_len_mean: 202.16
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.1107
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3282
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.950533669884684e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.32368148416280745
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.049841869001587234
          total_loss: 7129678619170.134
          vf_explained_var: -0.7996549606323242
          vf_loss: 3443494670973.156
    num_agent_steps_sampled: 1256000
    num_agent_steps_trained: 1256000
    num_steps_sampled: 1256000
    num_steps_trained: 1256000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1256,38032.6,1256000,-2.1107,-1.71,-7.07,202.16


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1257000
  custom_metrics: {}
  date: 2021-10-24_06-35-00
  done: false
  episode_len_mean: 202.48
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.1138999999999997
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3287
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.192580050482703e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.20113034463591045
          entropy_coeff: 0.009999999999999998
          kl: 0.003994468182452734
          policy_loss: -0.12355042720834414
          total_loss: 1700214060555.3777
          vf_explained_var: -0.4659564197063446
          vf_loss: 1700214060555.3777
    num_agent_steps_sampled: 1257000
    num_agent_steps_trained: 1257000
    num_steps_sampled: 1257000
    num_steps_trained: 1257

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1257,38080.8,1257000,-2.1139,-1.71,-7.07,202.48




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1258000
  custom_metrics: {}
  date: 2021-10-24_06-35-49
  done: false
  episode_len_mean: 202.77
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.1563999999999997
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3292
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.962900252413515e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.6943349848190944
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.05792586811714702
          total_loss: 2544324332748.8
          vf_explained_var: -0.6252523064613342
          vf_loss: 2916975103180.8
    num_agent_steps_sampled: 1258000
    num_agent_steps_trained: 1258000
    num_steps_sampled: 1258000
    num_steps_trained: 1258000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1258,38130.4,1258000,-2.1564,-1.71,-7.07,202.77


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1259000
  custom_metrics: {}
  date: 2021-10-24_06-36-37
  done: false
  episode_len_mean: 203.09
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.1595999999999997
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 4
  episodes_total: 3296
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.944350378620272e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.1867633420560095
          entropy_coeff: 0.009999999999999998
          kl: 0.00365365451485281
          policy_loss: -0.10553650682171185
          total_loss: 417106873912.8889
          vf_explained_var: -0.5280876755714417
          vf_loss: 417106873912.8889
    num_agent_steps_sampled: 1259000
    num_agent_steps_trained: 1259000
    num_steps_sampled: 1259000
    num_steps_trained: 1259000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1259,38177.8,1259000,-2.1596,-1.71,-7.07,203.09




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1260000
  custom_metrics: {}
  date: 2021-10-24_06-37-44
  done: false
  episode_len_mean: 203.05
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.159199999999999
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3301
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.472175189310136e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.5682970113431414
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.001689118788474136
          total_loss: 11446312084002.133
          vf_explained_var: -0.995092511177063
          vf_loss: 8439686191877.688
    num_agent_steps_sampled: 1260000
    num_agent_steps_trained: 1260000
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1260,38245.4,1260000,-2.1592,-1.71,-7.07,203.05




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1261000
  custom_metrics: {}
  date: 2021-10-24_06-38-31
  done: false
  episode_len_mean: 203.65
  episode_media: {}
  episode_reward_max: -1.7100000000000013
  episode_reward_mean: -2.165199999999999
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3306
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.708262783965204e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.3083837447067102
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.02677855326069726
          total_loss: 1670515418726.4
          vf_explained_var: -1.0
          vf_loss: 7109670082241.422
    num_agent_steps_sampled: 1261000
    num_agent_steps_trained: 1261000
    num_steps_sampled: 1261000
    num_steps_trained: 1261000
  iterations_since_restore: 1261


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1261,38292.1,1261000,-2.1652,-1.71,-7.07,203.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1262000
  custom_metrics: {}
  date: 2021-10-24_06-39-19
  done: false
  episode_len_mean: 204.22
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.170899999999999
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3311
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0062394175947807e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.13275557425287035
          entropy_coeff: 0.009999999999999998
          kl: 0.0037990250750731826
          policy_loss: -0.017940818270047506
          total_loss: 5228777102085.688
          vf_explained_var: -1.0
          vf_loss: 5228777102085.688
    num_agent_steps_sampled: 1262000
    num_agent_steps_trained: 1262000
    num_steps_sampled: 1262000
    num_steps_trained: 1262000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1262,38339.9,1262000,-2.1709,-1.73,-7.07,204.22




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1263000
  custom_metrics: {}
  date: 2021-10-24_06-40-08
  done: false
  episode_len_mean: 204.59
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.1745999999999994
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3316
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.031197087973903e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.5540871990223726
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.04754164707329538
          total_loss: 750352924672.0
          vf_explained_var: -0.7693288326263428
          vf_loss: 1119864677990.4
    num_agent_steps_sampled: 1263000
    num_agent_steps_trained: 1263000
    num_steps_sampled: 1263000
    num_steps_trained: 1263000
  iterations_since_re

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1263,38388.5,1263000,-2.1746,-1.73,-7.07,204.59




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1264000
  custom_metrics: {}
  date: 2021-10-24_06-40-54
  done: false
  episode_len_mean: 205.23
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.1809999999999987
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3321
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.546795631960854e-62
          cur_lr: 5.000000000000001e-05
          entropy: 0.6584233807192909
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.008137833409839207
          total_loss: 7908245250594.134
          vf_explained_var: -0.438002347946167
          vf_loss: 5490951171458.845
    num_agent_steps_sampled: 1264000
    num_agent_steps_trained: 1264000
    num_steps_sampled: 1264000
    num_steps_trained: 1264000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1264,38434.5,1264000,-2.181,-1.73,-7.07,205.23




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1265000
  custom_metrics: {}
  date: 2021-10-24_06-41-41
  done: false
  episode_len_mean: 205.86
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.1872999999999987
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 4
  episodes_total: 3325
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1320193447941285e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.5609916930397352
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.035812186863687305
          total_loss: .nan
          vf_explained_var: -1.0
          vf_loss: 2796034805668.9775
    num_agent_steps_sampled: 1265000
    num_agent_steps_trained: 1265000
    num_steps_sampled: 1265000
    num_steps_trained: 1265000
  iterations_since_restore: 1265
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1265,38481.4,1265000,-2.1873,-1.73,-7.07,205.86




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1266000
  custom_metrics: {}
  date: 2021-10-24_06-42-44
  done: false
  episode_len_mean: 206.27
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.191399999999999
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3330
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6980290171911925e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.8382409784528945
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.010411757561895583
          total_loss: 25757447072426.668
          vf_explained_var: -0.3978731036186218
          vf_loss: 19895841645454.223
    num_agent_steps_sampled: 1266000
    num_agent_steps_trained: 1266000
    num_steps_sampled: 1266000
    num_steps_trained: 1266000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1266,38544.6,1266000,-2.1914,-1.73,-7.07,206.27




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1267000
  custom_metrics: {}
  date: 2021-10-24_06-43-33
  done: false
  episode_len_mean: 206.94
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.1980999999999984
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3335
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.547043525786788e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.7783799144956801
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.08111947468585438
          total_loss: 3211119021260.8
          vf_explained_var: 0.002790462225675583
          vf_loss: 3204856452073.2446
    num_agent_steps_sampled: 1267000
    num_agent_steps_trained: 1267000
    num_steps_sampled: 1267000
    num_steps_trained: 1267000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1267,38593.6,1267000,-2.1981,-1.73,-7.07,206.94




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1268000
  custom_metrics: {}
  date: 2021-10-24_06-44-18
  done: false
  episode_len_mean: 207.56
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -2.204299999999998
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 4
  episodes_total: 3339
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.8205652886801825e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.4458116930392053
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.005823938796917598
          total_loss: 4330037855300.2666
          vf_explained_var: -1.0
          vf_loss: 3426510270190.933
    num_agent_steps_sampled: 1268000
    num_agent_steps_trained: 1268000
    num_steps_sampled: 1268000
    num_steps_trained: 1268000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1268,38639.1,1268000,-2.2043,-1.73,-7.07,207.56




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1269000
  custom_metrics: {}
  date: 2021-10-24_06-45-04
  done: false
  episode_len_mean: 208.1
  episode_media: {}
  episode_reward_max: -1.7600000000000013
  episode_reward_mean: -2.2096999999999984
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3344
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.730847933020274e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.626882736881574
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0526876870646245
          total_loss: 292718917495.4667
          vf_explained_var: -0.5341550707817078
          vf_loss: 8229602277967.645
    num_agent_steps_sampled: 1269000
    num_agent_steps_trained: 1269000
    num_steps_sampled: 1269000
    num_steps_trained: 1269000
  iterations_since

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1269,38684.4,1269000,-2.2097,-1.76,-7.07,208.1


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1270000
  custom_metrics: {}
  date: 2021-10-24_06-45-55
  done: false
  episode_len_mean: 208.25
  episode_media: {}
  episode_reward_max: -1.7600000000000013
  episode_reward_mean: -2.2111999999999985
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3349
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.596271899530409e-61
          cur_lr: 5.000000000000001e-05
          entropy: 0.937128910753462
          entropy_coeff: 0.009999999999999998
          kl: 0.9253485858440399
          policy_loss: -0.015221794984406895
          total_loss: 6414260142808.178
          vf_explained_var: -0.5499885678291321
          vf_loss: 6414260142808.178
    num_agent_steps_sampled: 1270000
    num_agent_steps_trained: 1270000
    num_steps_sampled: 1270000
    num_steps_trained: 1270000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1270,38735.9,1270000,-2.2112,-1.76,-7.07,208.25




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1271000
  custom_metrics: {}
  date: 2021-10-24_06-46-46
  done: false
  episode_len_mean: 208.6
  episode_media: {}
  episode_reward_max: -1.7600000000000013
  episode_reward_mean: -2.214699999999998
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 4
  episodes_total: 3353
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.289440784929562e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.8705355882644653
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.05472058587604099
          total_loss: 2301586020761.6
          vf_explained_var: 0.5978437662124634
          vf_loss: 1942140013772.8
    num_agent_steps_sampled: 1271000
    num_agent_steps_trained: 1271000
    num_steps_sampled: 1271000
    num_steps_trained: 1271000
  iterations_since_rest

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1271,38787,1271000,-2.2147,-1.76,-7.07,208.6




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1272000
  custom_metrics: {}
  date: 2021-10-24_06-47-33
  done: false
  episode_len_mean: 209.18
  episode_media: {}
  episode_reward_max: -1.7600000000000013
  episode_reward_mean: -2.2204999999999977
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 5
  episodes_total: 3358
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9341611773943428e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.633242529630661
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.04754614796903398
          total_loss: 1105938184055.4666
          vf_explained_var: -0.776257336139679
          vf_loss: 1393281227525.689
    num_agent_steps_sampled: 1272000
    num_agent_steps_trained: 1272000
    num_steps_sampled: 1272000
    num_steps_trained: 1272000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1272,38833.9,1272000,-2.2205,-1.76,-7.07,209.18




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1273000
  custom_metrics: {}
  date: 2021-10-24_06-48-38
  done: false
  episode_len_mean: 209.37
  episode_media: {}
  episode_reward_max: -1.7600000000000013
  episode_reward_mean: -2.222399999999998
  episode_reward_min: -7.06999999999996
  episodes_this_iter: 4
  episodes_total: 3362
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.9012417660915144e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.6128413124216927
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.06809593281812137
          total_loss: 6852357739861.333
          vf_explained_var: -0.6613916754722595
          vf_loss: 6856811007180.8
    num_agent_steps_sampled: 1273000
    num_agent_steps_trained: 1273000
    num_steps_sampled: 1273000
    num_steps_trained: 1273000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1273,38898.4,1273000,-2.2224,-1.76,-7.07,209.37


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1274000
  custom_metrics: {}
  date: 2021-10-24_06-49-26
  done: false
  episode_len_mean: 209.92
  episode_media: {}
  episode_reward_max: -1.7600000000000013
  episode_reward_mean: -2.2773999999999974
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3367
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.3518626491372727e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.6256807622085843
          entropy_coeff: 0.009999999999999998
          kl: 0.5356884042575326
          policy_loss: 0.08663396992617184
          total_loss: 761860192483.5555
          vf_explained_var: -0.2115795761346817
          vf_loss: 761860192483.5555
    num_agent_steps_sampled: 1274000
    num_agent_steps_trained: 1274000
    num_steps_sampled: 1274000
    num_steps_trained: 1274000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1274,38947.2,1274000,-2.2774,-1.76,-7.55,209.92




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1275000
  custom_metrics: {}
  date: 2021-10-24_06-50-13
  done: false
  episode_len_mean: 210.54
  episode_media: {}
  episode_reward_max: -1.8000000000000014
  episode_reward_mean: -2.243999999999998
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3372
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.527793973705907e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.5355443212721083
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.12486663688388136
          total_loss: 3051825188044.8
          vf_explained_var: -0.7172335982322693
          vf_loss: 4138853017554.489
    num_agent_steps_sampled: 1275000
    num_agent_steps_trained: 1275000
    num_steps_sampled: 1275000
    num_steps_trained: 1275000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1275,38994,1275000,-2.244,-1.8,-7.55,210.54




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1276000
  custom_metrics: {}
  date: 2021-10-24_06-51-00
  done: false
  episode_len_mean: 210.91
  episode_media: {}
  episode_reward_max: -1.8000000000000014
  episode_reward_mean: -2.1981999999999986
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3377
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.791690960558864e-60
          cur_lr: 5.000000000000001e-05
          entropy: 0.9648991756969028
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.015876166108581755
          total_loss: 4363714081587.2
          vf_explained_var: -0.7824152708053589
          vf_loss: 4492131200022.756
    num_agent_steps_sampled: 1276000
    num_agent_steps_trained: 1276000
    num_steps_sampled: 1276000
    num_steps_trained: 1276000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1276,39040.8,1276000,-2.1982,-1.8,-7.55,210.91




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1277000
  custom_metrics: {}
  date: 2021-10-24_06-51-45
  done: false
  episode_len_mean: 211.48
  episode_media: {}
  episode_reward_max: -1.8000000000000014
  episode_reward_mean: -2.203899999999998
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 4
  episodes_total: 3381
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4687536440838288e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.8810977465576596
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.19642764702439308
          total_loss: 1274995404526.9333
          vf_explained_var: -0.5230258703231812
          vf_loss: 3307056321149.156
    num_agent_steps_sampled: 1277000
    num_agent_steps_trained: 1277000
    num_steps_sampled: 1277000
    num_steps_trained: 1277000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1277,39085.7,1277000,-2.2039,-1.8,-7.55,211.48




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1278000
  custom_metrics: {}
  date: 2021-10-24_06-52-31
  done: false
  episode_len_mean: 211.76
  episode_media: {}
  episode_reward_max: -1.8000000000000014
  episode_reward_mean: -2.2066999999999988
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3386
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.203130466125744e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.45682343078984156
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0181727581553989
          total_loss: 967173109623.4667
          vf_explained_var: -0.43977710604667664
          vf_loss: 2243184638179.5557
    num_agent_steps_sampled: 1278000
    num_agent_steps_trained: 1278000
    num_steps_sampled: 1278000
    num_steps_trained: 1278000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1278,39131.2,1278000,-2.2067,-1.8,-7.55,211.76




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1279000
  custom_metrics: {}
  date: 2021-10-24_06-53-34
  done: false
  episode_len_mean: 211.99
  episode_media: {}
  episode_reward_max: -1.8000000000000014
  episode_reward_mean: -2.1693999999999987
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3391
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.3046956991886166e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.40206541799836687
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.13323122395409479
          total_loss: 1483785645806.9333
          vf_explained_var: -0.478255033493042
          vf_loss: 1113571080419.5557
    num_agent_steps_sampled: 1279000
    num_agent_steps_trained: 1279000
    num_steps_sampled: 1279000
    num_steps_trained: 1279000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1279,39194.9,1279000,-2.1694,-1.8,-7.55,211.99


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1280000
  custom_metrics: {}
  date: 2021-10-24_06-54-22
  done: false
  episode_len_mean: 212.18
  episode_media: {}
  episode_reward_max: -1.8000000000000014
  episode_reward_mean: -2.1712999999999987
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 4
  episodes_total: 3395
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.9570435487829245e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.6961526061097781
          entropy_coeff: 0.009999999999999998
          kl: 0.6095955845137355
          policy_loss: -0.09137583134902848
          total_loss: 3796541595830.0444
          vf_explained_var: 0.5430670976638794
          vf_loss: 3796541595830.0444
    num_agent_steps_sampled: 1280000
    num_agent_steps_trained: 1280000
    num_steps_sampled: 1280000
    num_steps_trained: 1280000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1280,39242.2,1280000,-2.1713,-1.8,-7.55,212.18




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1281000
  custom_metrics: {}
  date: 2021-10-24_06-55-09
  done: false
  episode_len_mean: 213.0
  episode_media: {}
  episode_reward_max: -1.8600000000000014
  episode_reward_mean: -2.1794999999999987
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3400
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.435565323174385e-59
          cur_lr: 5.000000000000001e-05
          entropy: 0.8822597076495489
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.08215565101967917
          total_loss: 49077011524.26667
          vf_explained_var: -0.6251787543296814
          vf_loss: 3357950191570.489
    num_agent_steps_sampled: 1281000
    num_agent_steps_trained: 1281000
    num_steps_sampled: 1281000
    num_steps_trained: 1281000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1281,39289.2,1281000,-2.1795,-1.86,-7.55,213




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1282000
  custom_metrics: {}
  date: 2021-10-24_06-55-56
  done: false
  episode_len_mean: 212.96
  episode_media: {}
  episode_reward_max: -1.8600000000000014
  episode_reward_mean: -2.1790999999999987
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3405
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.1153347984761579e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.8674648612737655
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.07426073898871739
          total_loss: 5346219834299.733
          vf_explained_var: -0.5031709671020508
          vf_loss: 5916698610528.711
    num_agent_steps_sampled: 1282000
    num_agent_steps_trained: 1282000
    num_steps_sampled: 1282000
    num_steps_trained: 1282000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1282,39336.4,1282000,-2.1791,-1.86,-7.55,212.96




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1283000
  custom_metrics: {}
  date: 2021-10-24_06-56-43
  done: false
  episode_len_mean: 212.95
  episode_media: {}
  episode_reward_max: -1.8600000000000014
  episode_reward_mean: -2.178999999999998
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3410
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.673002197714237e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.836363877190484
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.04294158352745904
          total_loss: 1611135267498.6667
          vf_explained_var: 0.5909130573272705
          vf_loss: 1605371688004.2666
    num_agent_steps_sampled: 1283000
    num_agent_steps_trained: 1283000
    num_steps_sampled: 1283000
    num_steps_trained: 1283000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1283,39383.5,1283000,-2.179,-1.86,-7.55,212.95




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1284000
  custom_metrics: {}
  date: 2021-10-24_06-57-31
  done: false
  episode_len_mean: 213.13
  episode_media: {}
  episode_reward_max: -1.8600000000000014
  episode_reward_mean: -2.1807999999999983
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 4
  episodes_total: 3414
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.509503296571356e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.6530427570144336
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.03444100610084004
          total_loss: 1574556886630.4
          vf_explained_var: -0.5603794455528259
          vf_loss: 1250430970538.6667
    num_agent_steps_sampled: 1284000
    num_agent_steps_trained: 1284000
    num_steps_sampled: 1284000
    num_steps_trained: 1284000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1284,39431.2,1284000,-2.1808,-1.86,-7.55,213.13




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1285000
  custom_metrics: {}
  date: 2021-10-24_06-58-20
  done: false
  episode_len_mean: 213.05
  episode_media: {}
  episode_reward_max: -1.8600000000000014
  episode_reward_mean: -2.179999999999999
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3419
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.7642549448570336e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.9302643141812749
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.06297266350852118
          total_loss: 1437188777574.4
          vf_explained_var: -0.15757790207862854
          vf_loss: 9569160006496.71
    num_agent_steps_sampled: 1285000
    num_agent_steps_trained: 1285000
    num_steps_sampled: 1285000
    num_steps_trained: 1285000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1285,39480.3,1285000,-2.18,-1.86,-7.55,213.05




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1286000
  custom_metrics: {}
  date: 2021-10-24_06-59-30
  done: false
  episode_len_mean: 212.35
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.172999999999999
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3424
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.646382417285551e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.20445163630776936
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.09477232992649079
          total_loss: 706635338956.8
          vf_explained_var: -0.28968575596809387
          vf_loss: 841209389238.0444
    num_agent_steps_sampled: 1286000
    num_agent_steps_trained: 1286000
    num_steps_sampled: 1286000
    num_steps_trained: 1286000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1286,39549.9,1286000,-2.173,-1.75,-7.55,212.35




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1287000
  custom_metrics: {}
  date: 2021-10-24_07-00-16
  done: false
  episode_len_mean: 212.47
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.1741999999999986
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3429
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.469573625928324e-58
          cur_lr: 5.000000000000001e-05
          entropy: 0.6633575293752882
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.058206106887923346
          total_loss: 759750280260.2667
          vf_explained_var: -0.2688651382923126
          vf_loss: 782792898059.3778
    num_agent_steps_sampled: 1287000
    num_agent_steps_trained: 1287000
    num_steps_sampled: 1287000
    num_steps_trained: 1287000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1287,39596.5,1287000,-2.1742,-1.75,-7.55,212.47




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1288000
  custom_metrics: {}
  date: 2021-10-24_07-01-02
  done: false
  episode_len_mean: 213.33
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.1827999999999985
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 4
  episodes_total: 3433
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2704360438892484e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.9126549979050954
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.045357082618607415
          total_loss: 1378451316189.8667
          vf_explained_var: -0.5453599095344543
          vf_loss: 3987046729136.3555
    num_agent_steps_sampled: 1288000
    num_agent_steps_trained: 1288000
    num_steps_sampled: 1288000
    num_steps_trained: 1288000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1288,39642.1,1288000,-2.1828,-1.75,-7.55,213.33




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1289000
  custom_metrics: {}
  date: 2021-10-24_07-01-48
  done: false
  episode_len_mean: 213.93
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.1881999999999975
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3438
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.905654065833873e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5930917842520608
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.035165239539411336
          total_loss: 2873185900953.6
          vf_explained_var: -0.37789925932884216
          vf_loss: 2062887689329.7778
    num_agent_steps_sampled: 1289000
    num_agent_steps_trained: 1289000
    num_steps_sampled: 1289000
    num_steps_trained: 1289000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1289,39688.3,1289000,-2.1882,-1.75,-7.55,213.93




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1290000
  custom_metrics: {}
  date: 2021-10-24_07-02-35
  done: false
  episode_len_mean: 213.45
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.183399999999998
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 4
  episodes_total: 3442
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8584810987508097e-57
          cur_lr: 5.000000000000001e-05
          entropy: 1.0199182046784294
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.060446296715074115
          total_loss: 1684566308727.4666
          vf_explained_var: 0.15178751945495605
          vf_loss: 1256677023926.0444
    num_agent_steps_sampled: 1290000
    num_agent_steps_trained: 1290000
    num_steps_sampled: 1290000
    num_steps_trained: 1290000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1290,39735.3,1290000,-2.1834,-1.75,-7.55,213.45




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1291000
  custom_metrics: {}
  date: 2021-10-24_07-03-22
  done: false
  episode_len_mean: 213.12
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.199599999999998
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3447
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.287721648126215e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.36568697674406897
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.03551593903038237
          total_loss: 425822935449.6
          vf_explained_var: -0.5042353272438049
          vf_loss: 924118828828.4445
    num_agent_steps_sampled: 1291000
    num_agent_steps_trained: 1291000
    num_steps_sampled: 1291000
    num_steps_trained: 1291000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1291,39782.5,1291000,-2.1996,-1.75,-7.55,213.12




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1292000
  custom_metrics: {}
  date: 2021-10-24_07-04-27
  done: false
  episode_len_mean: 213.33
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.2511999999999976
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3452
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.431582472189322e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.5186446046249734
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.02840584566195806
          total_loss: 1276644036881.0667
          vf_explained_var: -0.9096758365631104
          vf_loss: 1227501911699.9111
    num_agent_steps_sampled: 1292000
    num_agent_steps_trained: 1292000
    num_steps_sampled: 1292000
    num_steps_trained: 1292000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1292,39847.3,1292000,-2.2512,-1.75,-7.55,213.33


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1293000
  custom_metrics: {}
  date: 2021-10-24_07-05-15
  done: false
  episode_len_mean: 212.78
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.2456999999999976
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3457
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.647373708283979e-57
          cur_lr: 5.000000000000001e-05
          entropy: 0.3789026609311501
          entropy_coeff: 0.009999999999999998
          kl: 0.26578219976771733
          policy_loss: 0.06037198876341184
          total_loss: 528952583418.3111
          vf_explained_var: -0.48781806230545044
          vf_loss: 528952583418.3111
    num_agent_steps_sampled: 1293000
    num_agent_steps_trained: 1293000
    num_steps_sampled: 1293000
    num_steps_trained: 1293000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1293,39895.2,1293000,-2.2457,-1.75,-7.55,212.78


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1294000
  custom_metrics: {}
  date: 2021-10-24_07-06-05
  done: false
  episode_len_mean: 212.5
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.2428999999999983
  episode_reward_min: -7.54999999999999
  episodes_this_iter: 5
  episodes_total: 3462
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.447106056242597e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.0786312641782893
          entropy_coeff: 0.009999999999999998
          kl: 0.007397035674158461
          policy_loss: -0.08577362125118573
          total_loss: 3433206717553.778
          vf_explained_var: -1.0
          vf_loss: 3433206717553.778
    num_agent_steps_sampled: 1294000
    num_agent_steps_trained: 1294000
    num_steps_sampled: 1294000
    num_steps_trained: 1294000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1294,39944.7,1294000,-2.2429,-1.75,-7.55,212.5




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1295000
  custom_metrics: {}
  date: 2021-10-24_07-06-50
  done: false
  episode_len_mean: 212.47
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.1930999999999976
  episode_reward_min: -7.549999999999963
  episodes_this_iter: 4
  episodes_total: 3466
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.447106056242597e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.5881256575385729
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1442562868197759
          total_loss: 237803835927210.66
          vf_explained_var: 0.3461877405643463
          vf_loss: 158577095283871.28
    num_agent_steps_sampled: 1295000
    num_agent_steps_trained: 1295000
    num_steps_sampled: 1295000
    num_steps_trained: 1295000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1295,39989.7,1295000,-2.1931,-1.75,-7.55,212.47




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1296000
  custom_metrics: {}
  date: 2021-10-24_07-07-38
  done: false
  episode_len_mean: 213.0
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.1983999999999977
  episode_reward_min: -7.549999999999963
  episodes_this_iter: 5
  episodes_total: 3471
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1706590843638955e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.6961980786588456
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1618916650613149
          total_loss: 479440852309.3333
          vf_explained_var: -0.8507331609725952
          vf_loss: 1023895392893.1555
    num_agent_steps_sampled: 1296000
    num_agent_steps_trained: 1296000
    num_steps_sampled: 1296000
    num_steps_trained: 1296000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1296,40038.4,1296000,-2.1984,-1.75,-7.55,213




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1297000
  custom_metrics: {}
  date: 2021-10-24_07-08-26
  done: false
  episode_len_mean: 212.88
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.197199999999998
  episode_reward_min: -7.549999999999963
  episodes_this_iter: 4
  episodes_total: 3475
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2559886265458434e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.4503396037552092
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.014894639286730025
          total_loss: 706152381371.7333
          vf_explained_var: 0.028019476681947708
          vf_loss: 720382558936.1777
    num_agent_steps_sampled: 1297000
    num_agent_steps_trained: 1297000
    num_steps_sampled: 1297000
    num_steps_trained: 1297000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1297,40085.7,1297000,-2.1972,-1.75,-7.55,212.88




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1298000
  custom_metrics: {}
  date: 2021-10-24_07-09-30
  done: false
  episode_len_mean: 212.3
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.2705999999999973
  episode_reward_min: -7.549999999999963
  episodes_this_iter: 5
  episodes_total: 3480
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.883982939818765e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.7342479361428155
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.030197331143750086
          total_loss: .nan
          vf_explained_var: -0.4920835793018341
          vf_loss: 3393365411248.3555
    num_agent_steps_sampled: 1298000
    num_agent_steps_trained: 1298000
    num_steps_sampled: 1298000
    num_steps_trained: 1298000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1298,40150.3,1298000,-2.2706,-1.75,-7.55,212.3




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1299000
  custom_metrics: {}
  date: 2021-10-24_07-10-20
  done: false
  episode_len_mean: 212.33
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.310499999999997
  episode_reward_min: -7.549999999999963
  episodes_this_iter: 5
  episodes_total: 3485
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.325974409728148e-56
          cur_lr: 5.000000000000001e-05
          entropy: 0.8079326384597354
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07790328073832724
          total_loss: 5519598004906.667
          vf_explained_var: -0.5558095574378967
          vf_loss: 4238222846998.7554
    num_agent_steps_sampled: 1299000
    num_agent_steps_trained: 1299000
    num_steps_sampled: 1299000
    num_steps_trained: 1299000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1299,40199.5,1299000,-2.3105,-1.75,-7.55,212.33




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1300000
  custom_metrics: {}
  date: 2021-10-24_07-11-08
  done: false
  episode_len_mean: 212.45
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.450299999999997
  episode_reward_min: -7.549999999999963
  episodes_this_iter: 4
  episodes_total: 3489
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0988961614592221e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.9455726272530026
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.05488181528117922
          total_loss: 9446112030446.934
          vf_explained_var: -0.7287166118621826
          vf_loss: 7465060079297.422
    num_agent_steps_sampled: 1300000
    num_agent_steps_trained: 1300000
    num_steps_sampled: 1300000
    num_steps_trained: 1300000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1300,40247.6,1300000,-2.4503,-1.75,-7.55,212.45




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1301000
  custom_metrics: {}
  date: 2021-10-24_07-11-53
  done: false
  episode_len_mean: 212.89
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.563599999999996
  episode_reward_min: -8.179999999999964
  episodes_this_iter: 5
  episodes_total: 3494
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6483442421888332e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.6160937925179799
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.11028356129924456
          total_loss: 1303409232008.5334
          vf_explained_var: -1.0
          vf_loss: 1907159278114.1333
    num_agent_steps_sampled: 1301000
    num_agent_steps_trained: 1301000
    num_steps_sampled: 1301000
    num_steps_trained: 1301000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1301,40292.7,1301000,-2.5636,-1.75,-8.18,212.89




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1302000
  custom_metrics: {}
  date: 2021-10-24_07-12-39
  done: false
  episode_len_mean: 212.78
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.562199999999996
  episode_reward_min: -8.179999999999964
  episodes_this_iter: 5
  episodes_total: 3499
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4725163632832495e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.8546651211049822
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.05413342515627543
          total_loss: 2173645326472.5334
          vf_explained_var: -0.4266483187675476
          vf_loss: 2147090941360.3555
    num_agent_steps_sampled: 1302000
    num_agent_steps_trained: 1302000
    num_steps_sampled: 1302000
    num_steps_trained: 1302000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1302,40339.5,1302000,-2.5622,-1.75,-8.18,212.78




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1303000
  custom_metrics: {}
  date: 2021-10-24_07-13-25
  done: false
  episode_len_mean: 213.1
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.7032999999999943
  episode_reward_min: -9.36999999999997
  episodes_this_iter: 4
  episodes_total: 3503
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.708774544924873e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.8740115688906775
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.004651282727718353
          total_loss: 1763521221973.3333
          vf_explained_var: -0.43478623032569885
          vf_loss: 1755310223815.111
    num_agent_steps_sampled: 1303000
    num_agent_steps_trained: 1303000
    num_steps_sampled: 1303000
    num_steps_trained: 1303000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1303,40385.3,1303000,-2.7033,-1.75,-9.37,213.1




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1304000
  custom_metrics: {}
  date: 2021-10-24_07-14-13
  done: false
  episode_len_mean: 213.69
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.857699999999994
  episode_reward_min: -9.36999999999997
  episodes_this_iter: 5
  episodes_total: 3508
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.563161817387311e-55
          cur_lr: 5.000000000000001e-05
          entropy: 0.892575646440188
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.06190960920519299
          total_loss: 4075606217523.2
          vf_explained_var: -0.4061228334903717
          vf_loss: 3352281299535.6445
    num_agent_steps_sampled: 1304000
    num_agent_steps_trained: 1304000
    num_steps_sampled: 1304000
    num_steps_trained: 1304000
  iterations_since_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1304,40433,1304000,-2.8577,-1.75,-9.37,213.69




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1305000
  custom_metrics: {}
  date: 2021-10-24_07-15-18
  done: false
  episode_len_mean: 214.39
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -2.983199999999993
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3512
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.344742726080968e-55
          cur_lr: 5.000000000000001e-05
          entropy: 1.1245128293832143
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.09824065512253179
          total_loss: 3065403722956.8
          vf_explained_var: -0.2891116738319397
          vf_loss: 1387597868418.8445
    num_agent_steps_sampled: 1305000
    num_agent_steps_trained: 1305000
    num_steps_sampled: 1305000
    num_steps_trained: 1305000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1305,40498.2,1305000,-2.9832,-1.75,-9.49,214.39




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1306000
  custom_metrics: {}
  date: 2021-10-24_07-15-58
  done: false
  episode_len_mean: 215.73
  episode_media: {}
  episode_reward_max: -1.7500000000000013
  episode_reward_mean: -3.0949999999999926
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3516
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2517114089121457e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8007875468995836
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.19924777663416332
          total_loss: .nan
          vf_explained_var: -0.6723575592041016
          vf_loss: 1434462564078.9333
    num_agent_steps_sampled: 1306000
    num_agent_steps_trained: 1306000
    num_steps_sampled: 1306000
    num_steps_trained: 1306000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1306,40538.2,1306000,-3.095,-1.75,-9.49,215.73




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1307000
  custom_metrics: {}
  date: 2021-10-24_07-16-48
  done: false
  episode_len_mean: 216.88
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.2054999999999922
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 5
  episodes_total: 3521
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.877567113368218e-54
          cur_lr: 5.000000000000001e-05
          entropy: 1.0844661864969465
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.04789993895424737
          total_loss: 2753503465745.067
          vf_explained_var: -0.7459008097648621
          vf_loss: 2282869132856.8887
    num_agent_steps_sampled: 1307000
    num_agent_steps_trained: 1307000
    num_steps_sampled: 1307000
    num_steps_trained: 1307000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1307,40587.5,1307000,-3.2055,-2,-9.49,216.88




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1308000
  custom_metrics: {}
  date: 2021-10-24_07-17-31
  done: false
  episode_len_mean: 217.91
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.2157999999999913
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3525
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.8163506700523275e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.6920242369174957
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.20539408599336942
          total_loss: .nan
          vf_explained_var: 0.17661884427070618
          vf_loss: 456311978575.6445
    num_agent_steps_sampled: 1308000
    num_agent_steps_trained: 1308000
    num_steps_sampled: 1308000
    num_steps_trained: 1308000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1308,40630.4,1308000,-3.2158,-2,-9.49,217.91


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1309000
  custom_metrics: {}
  date: 2021-10-24_07-18-16
  done: false
  episode_len_mean: 217.96
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.2162999999999915
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 5
  episodes_total: 3530
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.22452600507849e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.5273884766631656
          entropy_coeff: 0.009999999999999998
          kl: 0.1397206968669751
          policy_loss: -0.0010227356726924578
          total_loss: 967262622424.1777
          vf_explained_var: -0.03555953875184059
          vf_loss: 967262622424.1777
    num_agent_steps_sampled: 1309000
    num_agent_steps_trained: 1309000
    num_steps_sampled: 1309000
    num_steps_trained: 130900

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1309,40675.4,1309000,-3.2163,-2,-9.49,217.96




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1310000
  custom_metrics: {}
  date: 2021-10-24_07-18-58
  done: false
  episode_len_mean: 218.37
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.220399999999992
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3534
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.336789007617735e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.5806995029250781
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0026413035889466603
          total_loss: 1171767963374.9333
          vf_explained_var: -0.8739389777183533
          vf_loss: 2381577141179.7334
    num_agent_steps_sampled: 1310000
    num_agent_steps_trained: 1310000
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1310,40718.2,1310000,-3.2204,-2,-9.49,218.37




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1311000
  custom_metrics: {}
  date: 2021-10-24_07-19-42
  done: false
  episode_len_mean: 219.61
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.233399999999991
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3538
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.505183511426606e-54
          cur_lr: 5.000000000000001e-05
          entropy: 0.8001239173942142
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.07901777939663993
          total_loss: .nan
          vf_explained_var: -0.4158112406730652
          vf_loss: 682167900205.5111
    num_agent_steps_sampled: 1311000
    num_agent_steps_trained: 1311000
    num_steps_sampled: 1311000
    num_steps_trained: 1311000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1311,40761.7,1311000,-3.2334,-2,-9.49,219.61




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1312000
  custom_metrics: {}
  date: 2021-10-24_07-20-43
  done: false
  episode_len_mean: 220.7
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.244299999999991
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3542
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.425777526713991e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.0905627402994367
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.002513680524296231
          total_loss: 2531999695940.2666
          vf_explained_var: -1.0
          vf_loss: 2621293374122.6665
    num_agent_steps_sampled: 1312000
    num_agent_steps_trained: 1312000
    num_steps_sampled: 1312000
    num_steps_trained: 1312000
  iterations_since_restore: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1312,40823,1312000,-3.2443,-2,-9.49,220.7




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1313000
  custom_metrics: {}
  date: 2021-10-24_07-21-29
  done: false
  episode_len_mean: 221.27
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.230499999999991
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3546
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.1386662900709854e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.1850674549738567
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.2331047284985996
          total_loss: 4685179266662.4
          vf_explained_var: -0.07790673524141312
          vf_loss: 3405568041142.0444
    num_agent_steps_sampled: 1313000
    num_agent_steps_trained: 1313000
    num_steps_sampled: 1313000
    num_steps_trained: 1313000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1313,40868.6,1313000,-3.2305,-2,-9.49,221.27




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1314000
  custom_metrics: {}
  date: 2021-10-24_07-22-11
  done: false
  episode_len_mean: 221.64
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.184699999999991
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 5
  episodes_total: 3551
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2079994351064784e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.011004979742898
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.04290918327040143
          total_loss: 2676036132930.0645
          vf_explained_var: -0.35215264558792114
          vf_loss: 2043632001797.689
    num_agent_steps_sampled: 1314000
    num_agent_steps_trained: 1314000
    num_steps_sampled: 1314000
    num_steps_trained: 1314000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1314,40911.1,1314000,-3.1847,-2,-9.49,221.64




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1315000
  custom_metrics: {}
  date: 2021-10-24_07-23-01
  done: false
  episode_len_mean: 222.99
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.1981999999999915
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3555
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.811999152659717e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.0233960694736906
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.18626945912837983
          total_loss: .nan
          vf_explained_var: -0.3926311135292053
          vf_loss: 460060720332.8
    num_agent_steps_sampled: 1315000
    num_agent_steps_trained: 1315000
    num_steps_sampled: 1315000
    num_steps_trained: 1315000
  iterations_since_restore: 131

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1315,40960.7,1315000,-3.1982,-2,-9.49,222.99




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1316000
  custom_metrics: {}
  date: 2021-10-24_07-23-45
  done: false
  episode_len_mean: 224.05
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.2087999999999908
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 5
  episodes_total: 3560
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.217998728989574e-53
          cur_lr: 5.000000000000001e-05
          entropy: 1.0110902700159285
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.0637870717379782
          total_loss: 1381799567906.1333
          vf_explained_var: 0.5790479779243469
          vf_loss: 1343394775768.1777
    num_agent_steps_sampled: 1316000
    num_agent_steps_trained: 1316000
    num_steps_sampled: 1316000
    num_steps_trained: 1316000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1316,41005.1,1316000,-3.2088,-2,-9.49,224.05




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1317000
  custom_metrics: {}
  date: 2021-10-24_07-24-27
  done: false
  episode_len_mean: 224.79
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.2121999999999904
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3564
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0826998093484364e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.6767273975743188
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.11631216870413887
          total_loss: 3276634796851.2
          vf_explained_var: -1.0
          vf_loss: 2277069058503.1113
    num_agent_steps_sampled: 1317000
    num_agent_steps_trained: 1317000
    num_steps_sampled: 1317000
    num_steps_trained: 1317000
  iterations_since_restore: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1317,41046.3,1317000,-3.2122,-2,-9.49,224.79




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1318000
  custom_metrics: {}
  date: 2021-10-24_07-25-10
  done: false
  episode_len_mean: 226.75
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.23179999999999
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3568
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6240497140226542e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.7977635774347517
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.018426779533425967
          total_loss: .nan
          vf_explained_var: -1.0
          vf_loss: 2048807505464.889
    num_agent_steps_sampled: 1318000
    num_agent_steps_trained: 1318000
    num_steps_sampled: 1318000
    num_steps_trained: 1318000
  iterations_since_restore: 1318
  node_ip:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1318,41089.3,1318000,-3.2318,-2,-9.49,226.75




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1319000
  custom_metrics: {}
  date: 2021-10-24_07-26-13
  done: false
  episode_len_mean: 228.07
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.244999999999989
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3572
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4360745710339817e-52
          cur_lr: 5.000000000000001e-05
          entropy: 1.0368996507591672
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.015389788440532154
          total_loss: 2750399045085.8667
          vf_explained_var: -0.6662815809249878
          vf_loss: 2896929829683.2
    num_agent_steps_sampled: 1319000
    num_agent_steps_trained: 1319000
    num_steps_sampled: 1319000
    num_steps_trained: 1319000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1319,41152.3,1319000,-3.245,-2,-9.49,228.07




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1320000
  custom_metrics: {}
  date: 2021-10-24_07-26-55
  done: false
  episode_len_mean: 228.89
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.292799999999989
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3576
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.654111856550972e-52
          cur_lr: 5.000000000000001e-05
          entropy: 0.9074158893691169
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0022949705521265666
          total_loss: 3956555163238.4
          vf_explained_var: -1.0
          vf_loss: 2956160081737.9556
    num_agent_steps_sampled: 1320000
    num_agent_steps_trained: 1320000
    num_steps_sampled: 1320000
    num_steps_trained: 1320000
  iterations_since_restore: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1320,41194.4,1320000,-3.2928,-2,-9.49,228.89




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1321000
  custom_metrics: {}
  date: 2021-10-24_07-27-32
  done: false
  episode_len_mean: 230.67
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.270999999999989
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 3
  episodes_total: 3579
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.4811677848264594e-52
          cur_lr: 5.000000000000001e-05
          entropy: 1.087839107380973
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03838803056213591
          total_loss: 919922513237.3334
          vf_explained_var: -0.5459114909172058
          vf_loss: 1201516688179.2
    num_agent_steps_sampled: 1321000
    num_agent_steps_trained: 1321000
    num_steps_sampled: 1321000
    num_steps_trained: 1321000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1321,41231.1,1321000,-3.271,-2,-9.49,230.67




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1322000
  custom_metrics: {}
  date: 2021-10-24_07-28-11
  done: false
  episode_len_mean: 233.79
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.222999999999989
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3583
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.221751677239687e-52
          cur_lr: 5.000000000000001e-05
          entropy: 1.4799626893467372
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.012033759802579879
          total_loss: .nan
          vf_explained_var: -1.0
          vf_loss: 1286944842638.2222
    num_agent_steps_sampled: 1322000
    num_agent_steps_trained: 1322000
    num_steps_sampled: 1322000
    num_steps_trained: 1322000
  iterations_since_restore: 1322
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1322,41270.2,1322000,-3.223,-2,-9.49,233.79




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1323000
  custom_metrics: {}
  date: 2021-10-24_07-28-50
  done: false
  episode_len_mean: 234.61
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.1420999999999895
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3587
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.2332627515859531e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.573375380701489
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.0554729946785503
          total_loss: 715786187114.6666
          vf_explained_var: -0.9439558386802673
          vf_loss: 1070516040558.9333
    num_agent_steps_sampled: 1323000
    num_agent_steps_trained: 1323000
    num_steps_sampled: 1323000
    num_steps_trained: 1323000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1323,41309.5,1323000,-3.1421,-2,-9.49,234.61




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1324000
  custom_metrics: {}
  date: 2021-10-24_07-29-32
  done: false
  episode_len_mean: 235.49
  episode_media: {}
  episode_reward_max: -2.0000000000000013
  episode_reward_mean: -3.0518999999999896
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3591
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.84989412737893e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.2419730385144552
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.02175847300224834
          total_loss: 2570912706560.0
          vf_explained_var: -0.4592170715332031
          vf_loss: 3541637823783.8223
    num_agent_steps_sampled: 1324000
    num_agent_steps_trained: 1324000
    num_steps_sampled: 1324000
    num_steps_trained: 1324000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1324,41351,1324000,-3.0519,-2,-9.49,235.49




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1325000
  custom_metrics: {}
  date: 2021-10-24_07-30-12
  done: false
  episode_len_mean: 237.76
  episode_media: {}
  episode_reward_max: -1.0700000000000007
  episode_reward_mean: -3.02459999999999
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3595
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.774841191068395e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.3270599206288656
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.002053816554447015
          total_loss: .nan
          vf_explained_var: -1.0
          vf_loss: 823030791099.7333
    num_agent_steps_sampled: 1325000
    num_agent_steps_trained: 1325000
    num_steps_sampled: 1325000
    num_steps_trained: 1325000
  iterations_since_restore: 1325
  node_ip: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1325,41391.6,1325000,-3.0246,-1.07,-9.49,237.76




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1326000
  custom_metrics: {}
  date: 2021-10-24_07-30-55
  done: false
  episode_len_mean: 239.01
  episode_media: {}
  episode_reward_max: -1.0700000000000007
  episode_reward_mean: -3.0370999999999895
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3599
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.162261786602592e-51
          cur_lr: 5.000000000000001e-05
          entropy: 1.2541078666845957
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.013974185246560309
          total_loss: 1187943872375.4666
          vf_explained_var: -1.0
          vf_loss: 885308479169.4222
    num_agent_steps_sampled: 1326000
    num_agent_steps_trained: 1326000
    num_steps_sampled: 1326000
    num_steps_trained: 1326000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1326,41434.5,1326000,-3.0371,-1.07,-9.49,239.01




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1327000
  custom_metrics: {}
  date: 2021-10-24_07-31-55
  done: false
  episode_len_mean: 239.77
  episode_media: {}
  episode_reward_max: -1.0700000000000007
  episode_reward_mean: -2.905599999999991
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 4
  episodes_total: 3603
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.243392679903888e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.5409941289159986
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.2575210177236133
          total_loss: .nan
          vf_explained_var: -0.33332550525665283
          vf_loss: 44511969177.6
    num_agent_steps_sampled: 1327000
    num_agent_steps_trained: 1327000
    num_steps_sampled: 1327000
    num_steps_trained: 1327000
  iterations_since_restore: 1327


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1327,41494.7,1327000,-2.9056,-1.07,-9.49,239.77


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1328000
  custom_metrics: {}
  date: 2021-10-24_07-32-45
  done: false
  episode_len_mean: 239.51
  episode_media: {}
  episode_reward_max: -1.0700000000000007
  episode_reward_mean: -2.754499999999991
  episode_reward_min: -9.489999999999974
  episodes_this_iter: 5
  episodes_total: 3608
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.365089019855832e-51
          cur_lr: 5.000000000000001e-05
          entropy: 0.4039702692586515
          entropy_coeff: 0.009999999999999998
          kl: 0.05661280809286369
          policy_loss: 0.017996727923552194
          total_loss: 71240768267514.31
          vf_explained_var: 0.0969008132815361
          vf_loss: 71240768267514.31
    num_agent_steps_sampled: 1328000
    num_agent_steps_trained: 1328000
    num_steps_sampled: 1328000
    num_steps_trained: 1328000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1328,41544,1328000,-2.7545,-1.07,-9.49,239.51




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1329000
  custom_metrics: {}
  date: 2021-10-24_07-33-29
  done: false
  episode_len_mean: 240.7
  episode_media: {}
  episode_reward_max: -1.0700000000000007
  episode_reward_mean: -2.792499999999991
  episode_reward_min: -9.19999999999997
  episodes_this_iter: 4
  episodes_total: 3612
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.404763352978375e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.7558896134297053
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.013457432472043568
          total_loss: .nan
          vf_explained_var: -0.10479606688022614
          vf_loss: 608974097794.8445
    num_agent_steps_sampled: 1329000
    num_agent_steps_trained: 1329000
    num_steps_sampled: 1329000
    num_steps_trained: 1329000
  iterations_since_restore: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1329,41588.2,1329000,-2.7925,-1.07,-9.2,240.7




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1330000
  custom_metrics: {}
  date: 2021-10-24_07-34-19
  done: false
  episode_len_mean: 240.4
  episode_media: {}
  episode_reward_max: -1.0700000000000007
  episode_reward_mean: -2.7951999999999915
  episode_reward_min: -9.19999999999997
  episodes_this_iter: 4
  episodes_total: 3616
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.107145029467562e-50
          cur_lr: 5.000000000000001e-05
          entropy: 1.0017310440540315
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.038742523392041525
          total_loss: 210449278020.26666
          vf_explained_var: -0.2885405719280243
          vf_loss: 496932680681.24445
    num_agent_steps_sampled: 1330000
    num_agent_steps_trained: 1330000
    num_steps_sampled: 1330000
    num_steps_trained: 1330000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1330,41637.9,1330000,-2.7952,-1.07,-9.2,240.4


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1331000
  custom_metrics: {}
  date: 2021-10-24_07-35-09
  done: false
  episode_len_mean: 239.8
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -2.6992999999999916
  episode_reward_min: -9.19999999999997
  episodes_this_iter: 5
  episodes_total: 3621
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.160717544201343e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.5347215508421262
          entropy_coeff: 0.009999999999999998
          kl: 0.2536943081260088
          policy_loss: -0.15851571905530162
          total_loss: 3418014165856.711
          vf_explained_var: -0.3407706022262573
          vf_loss: 3418014165856.711
    num_agent_steps_sampled: 1331000
    num_agent_steps_trained: 1331000
    num_steps_sampled: 1331000
    num_steps_trained: 1331000
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1331,41688.5,1331000,-2.6993,-0.22,-9.2,239.8




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1332000
  custom_metrics: {}
  date: 2021-10-24_07-35-56
  done: false
  episode_len_mean: 239.33
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -2.7258999999999918
  episode_reward_min: -9.19999999999997
  episodes_this_iter: 4
  episodes_total: 3625
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.741076316302015e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.9604452192783356
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.01751583930518892
          total_loss: 1880014922274.1333
          vf_explained_var: -0.5744982957839966
          vf_loss: 2006044721516.0889
    num_agent_steps_sampled: 1332000
    num_agent_steps_trained: 1332000
    num_steps_sampled: 1332000
    num_steps_trained: 1332000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1332,41735.3,1332000,-2.7259,-0.22,-9.2,239.33




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1333000
  custom_metrics: {}
  date: 2021-10-24_07-36-58
  done: false
  episode_len_mean: 239.51
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -2.7518999999999907
  episode_reward_min: -9.19999999999997
  episodes_this_iter: 5
  episodes_total: 3630
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.111614474453024e-50
          cur_lr: 5.000000000000001e-05
          entropy: 0.940625309281879
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.01567468974325392
          total_loss: 1485797615206.4
          vf_explained_var: 0.07507769018411636
          vf_loss: 937898415081.2444
    num_agent_steps_sampled: 1333000
    num_agent_steps_trained: 1333000
    num_steps_sampled: 1333000
    num_steps_trained: 1333000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1333,41796.8,1333000,-2.7519,-0.22,-9.2,239.51




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1334000
  custom_metrics: {}
  date: 2021-10-24_07-37-44
  done: false
  episode_len_mean: 239.34
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -2.847099999999991
  episode_reward_min: -9.19999999999997
  episodes_this_iter: 4
  episodes_total: 3634
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.0667421711679532e-49
          cur_lr: 5.000000000000001e-05
          entropy: 1.1700273977385627
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.03808098021480772
          total_loss: 177587096507.73334
          vf_explained_var: 0.49727460741996765
          vf_loss: 169084812219.73334
    num_agent_steps_sampled: 1334000
    num_agent_steps_trained: 1334000
    num_steps_sampled: 1334000
    num_steps_trained: 1334000
  iterations

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1334,41842.8,1334000,-2.8471,-0.22,-9.2,239.34




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1335000
  custom_metrics: {}
  date: 2021-10-24_07-38-33
  done: false
  episode_len_mean: 237.96
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -2.92239999999999
  episode_reward_min: -9.19999999999997
  episodes_this_iter: 5
  episodes_total: 3639
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6001132567519301e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.6599340110189384
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.08220940248833762
          total_loss: 227864379998.81482
          vf_explained_var: -0.674561619758606
          vf_loss: 270110195347.9111
    num_agent_steps_sampled: 1335000
    num_agent_steps_trained: 1335000
    num_steps_sampled: 1335000
    num_steps_trained: 1335000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1335,41891.9,1335000,-2.9224,-0.22,-9.2,237.96




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1336000
  custom_metrics: {}
  date: 2021-10-24_07-39-12
  done: false
  episode_len_mean: 238.42
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.0455999999999905
  episode_reward_min: -9.579999999999973
  episodes_this_iter: 4
  episodes_total: 3643
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.400169885127896e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.7459140307373471
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.11082432899210189
          total_loss: 882011423812.2667
          vf_explained_var: -0.9979109168052673
          vf_loss: 657800372041.9556
    num_agent_steps_sampled: 1336000
    num_agent_steps_trained: 1336000
    num_steps_sampled: 1336000
    num_steps_trained: 1336000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1336,41931.5,1336000,-3.0456,-0.22,-9.58,238.42




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1337000
  custom_metrics: {}
  date: 2021-10-24_07-39-56
  done: false
  episode_len_mean: 239.62
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.09189999999999
  episode_reward_min: -9.579999999999973
  episodes_this_iter: 4
  episodes_total: 3647
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.600254827691843e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.5556144194470511
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.09809638601210383
          total_loss: 1170613904998.4
          vf_explained_var: -0.20551003515720367
          vf_loss: 1002016819700.6222
    num_agent_steps_sampled: 1337000
    num_agent_steps_trained: 1337000
    num_steps_sampled: 1337000
    num_steps_trained: 1337000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1337,41975.3,1337000,-3.0919,-0.22,-9.58,239.62


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1338000
  custom_metrics: {}
  date: 2021-10-24_07-40-35
  done: false
  episode_len_mean: 240.19
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.09759999999999
  episode_reward_min: -9.579999999999973
  episodes_this_iter: 3
  episodes_total: 3650
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.400382241537764e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.1725399745007356
          entropy_coeff: 0.009999999999999998
          kl: 0.0017628587244566851
          policy_loss: -0.22034147042367194
          total_loss: 482560131163.0222
          vf_explained_var: -0.40878185629844666
          vf_loss: 482560131163.0222
    num_agent_steps_sampled: 1338000
    num_agent_steps_trained: 1338000
    num_steps_sampled: 1338000
    num_steps_trained: 13380

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1338,42014.4,1338000,-3.0976,-0.22,-9.58,240.19




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1339000
  custom_metrics: {}
  date: 2021-10-24_07-41-13
  done: false
  episode_len_mean: 241.87
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.193399999999989
  episode_reward_min: -10.909999999999975
  episodes_this_iter: 4
  episodes_total: 3654
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.700191120768882e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.834067189693451
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.12891867127683426
          total_loss: 95133626467302.4
          vf_explained_var: 0.0506834015250206
          vf_loss: 63500094902664.53
    num_agent_steps_sampled: 1339000
    num_agent_steps_trained: 1339000
    num_steps_sampled: 1339000
    num_steps_trained: 1339000
  iterations_sinc

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1339,42052,1339000,-3.1934,-0.22,-10.91,241.87




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1340000
  custom_metrics: {}
  date: 2021-10-24_07-41-51
  done: false
  episode_len_mean: 243.55
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.279099999999989
  episode_reward_min: -10.909999999999975
  episodes_this_iter: 4
  episodes_total: 3658
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.050286681153326e-49
          cur_lr: 5.000000000000001e-05
          entropy: 1.1336800012323591
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.01898698343171014
          total_loss: .nan
          vf_explained_var: -1.0
          vf_loss: 1893697391456.7112
    num_agent_steps_sampled: 1340000
    num_agent_steps_trained: 1340000
    num_steps_sampled: 1340000
    num_steps_trained: 1340000
  iterations_since_restore: 1340
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1340,42089.7,1340000,-3.2791,-0.22,-10.91,243.55




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1341000
  custom_metrics: {}
  date: 2021-10-24_07-42-49
  done: false
  episode_len_mean: 243.65
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.2840999999999885
  episode_reward_min: -10.909999999999975
  episodes_this_iter: 4
  episodes_total: 3662
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 6.075430021729986e-49
          cur_lr: 5.000000000000001e-05
          entropy: 1.4591216325759888
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.22464905480543773
          total_loss: 1941518225831.724
          vf_explained_var: 0.2340068221092224
          vf_loss: 1430454218114.8445
    num_agent_steps_sampled: 1341000
    num_agent_steps_trained: 1341000
    num_steps_sampled: 1341000
    num_steps_trained: 1341000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1341,42147.6,1341000,-3.2841,-0.22,-10.91,243.65


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1342000
  custom_metrics: {}
  date: 2021-10-24_07-43-33
  done: false
  episode_len_mean: 243.09
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.2784999999999886
  episode_reward_min: -10.909999999999975
  episodes_this_iter: 4
  episodes_total: 3666
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 9.113145032594981e-49
          cur_lr: 5.000000000000001e-05
          entropy: 1.2829474965731302
          entropy_coeff: 0.009999999999999998
          kl: 1.714110858945383
          policy_loss: 0.023669568200906117
          total_loss: 3790520682632.533
          vf_explained_var: -0.3206571042537689
          vf_loss: 3790520682632.533
    num_agent_steps_sampled: 1342000
    num_agent_steps_trained: 1342000
    num_steps_sampled: 1342000
    num_steps_trained: 1342000

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1342,42191.4,1342000,-3.2785,-0.22,-10.91,243.09




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1343000
  custom_metrics: {}
  date: 2021-10-24_07-44-12
  done: false
  episode_len_mean: 244.58
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.5087999999999893
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3670
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.366971754889247e-48
          cur_lr: 5.000000000000001e-05
          entropy: 1.4103735579384697
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.03316236258380943
          total_loss: .nan
          vf_explained_var: -1.0
          vf_loss: 1424898252253.8667
    num_agent_steps_sampled: 1343000
    num_agent_steps_trained: 1343000
    num_steps_sampled: 1343000
    num_steps_trained: 1343000
  iterations_since_restore: 1343
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1343,42231,1343000,-3.5088,-0.22,-13.91,244.58


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1344000
  custom_metrics: {}
  date: 2021-10-24_07-45-02
  done: false
  episode_len_mean: 243.28
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.4561999999999893
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3675
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.05045763233387e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.534780106279585
          entropy_coeff: 0.009999999999999998
          kl: 1.0542377312978108
          policy_loss: 0.049364919463793434
          total_loss: 599567306478.9333
          vf_explained_var: -0.3333333432674408
          vf_loss: 599567306478.9333
    num_agent_steps_sampled: 1344000
    num_agent_steps_trained: 1344000
    num_steps_sampled: 1344000
    num_steps_trained: 1344000


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1344,42280.4,1344000,-3.4562,-0.22,-13.91,243.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1345000
  custom_metrics: {}
  date: 2021-10-24_07-45-55
  done: false
  episode_len_mean: 239.2
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.4153999999999898
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3680
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.075686448500806e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.023690228660901386
          total_loss: 1282952198735.6445
          vf_explained_var: 9.536743306171047e-08
          vf_loss: 1282952198735.6445
    num_agent_steps_sampled: 1345000
    num_agent_steps_trained: 1345000
    num_steps_sampled: 1345000
    num_steps_trained: 1345000
  iterations_since_restor

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1345,42333.5,1345000,-3.4154,-0.22,-13.91,239.2


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1346000
  custom_metrics: {}
  date: 2021-10-24_07-46-43
  done: false
  episode_len_mean: 236.84
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.3917999999999906
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3685
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.537843224250403e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.02369019389152527
          total_loss: 5926722389.333333
          vf_explained_var: 5.960464477539063e-08
          vf_loss: 5926722389.333333
    num_agent_steps_sampled: 1346000
    num_agent_steps_trained: 1346000
    num_steps_sampled: 1346000
    num_steps_trained: 1346000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1346,42382,1346000,-3.3918,-0.22,-13.91,236.84




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1347000
  custom_metrics: {}
  date: 2021-10-24_07-47-55
  done: false
  episode_len_mean: 233.33
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.356699999999991
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3690
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.689216121252015e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: 0.02633168486257394
          total_loss: 5876033365.333333
          vf_explained_var: 0.0
          vf_loss: 5876033365.333333
    num_agent_steps_sampled: 1347000
    num_agent_steps_trained: 1347000
    num_steps_sampled: 1347000
    num_steps_trained: 1347000
  iterations_since_restore: 1347
  node_ip: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1347,42454,1347000,-3.3567,-0.22,-13.91,233.33


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1348000
  custom_metrics: {}
  date: 2021-10-24_07-48-46
  done: false
  episode_len_mean: 230.62
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.3204999999999916
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3695
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.844608060626007e-49
          cur_lr: 5.000000000000001e-05
          entropy: 0.2917155497603946
          entropy_coeff: 0.009999999999999998
          kl: 0.5698996994230482
          policy_loss: -0.11497774720191956
          total_loss: 1476100035828.6223
          vf_explained_var: 0.15381929278373718
          vf_loss: 1476100035828.6223
    num_agent_steps_sampled: 1348000
    num_agent_steps_trained: 1348000
    num_steps_sampled: 1348000
    num_steps_trained: 1348

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1348,42504.3,1348000,-3.3205,-0.22,-13.91,230.62




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1349000
  custom_metrics: {}
  date: 2021-10-24_07-49-28
  done: false
  episode_len_mean: 230.6
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.3202999999999916
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3699
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.76691209093901e-49
          cur_lr: 5.000000000000001e-05
          entropy: 1.0289389346208837
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.1218936229745547
          total_loss: 2.0811825384993588e+16
          vf_explained_var: -0.32664576172828674
          vf_loss: 9501626258867632.0
    num_agent_steps_sampled: 1349000
    num_agent_steps_trained: 1349000
    num_steps_sampled: 1349000
    num_steps_trained: 1349000
  iterat

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1349,42546.6,1349000,-3.3203,-0.22,-13.91,230.6




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1350000
  custom_metrics: {}
  date: 2021-10-24_07-50-13
  done: false
  episode_len_mean: 230.22
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.41009999999999
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3703
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.650368136408515e-49
          cur_lr: 5.000000000000001e-05
          entropy: 1.0615979532400768
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.10634547687239117
          total_loss: .nan
          vf_explained_var: 0.018601391464471817
          vf_loss: 643421668784.3556
    num_agent_steps_sampled: 1350000
    num_agent_steps_trained: 1350000
    num_steps_sampled: 1350000
    num_steps_trained: 1350000
  iterations_since_restore:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1350,42591.8,1350000,-3.4101,-0.22,-13.91,230.22




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1351000
  custom_metrics: {}
  date: 2021-10-24_07-50-53
  done: false
  episode_len_mean: 231.28
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.56859999999999
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3707
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.297555220461277e-48
          cur_lr: 5.000000000000001e-05
          entropy: 1.2223382141855028
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.08395868399076992
          total_loss: 1395701566668.8
          vf_explained_var: -0.44095277786254883
          vf_loss: 2063144552584.5334
    num_agent_steps_sampled: 1351000
    num_agent_steps_trained: 1351000
    num_steps_sampled: 1351000
    num_steps_trained: 1351000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1351,42632.1,1351000,-3.5686,-0.22,-13.91,231.28


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1352000
  custom_metrics: {}
  date: 2021-10-24_07-51-43
  done: false
  episode_len_mean: 229.97
  episode_media: {}
  episode_reward_max: -0.22000000000000147
  episode_reward_mean: -3.4303999999999912
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3712
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.9463328306919165e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.5971842037306891
          entropy_coeff: 0.009999999999999998
          kl: 0.8794893158806695
          policy_loss: -0.030628301327427228
          total_loss: 185042981523.9111
          vf_explained_var: 0.17037269473075867
          vf_loss: 185042981523.9111
    num_agent_steps_sampled: 1352000
    num_agent_steps_trained: 1352000
    num_steps_sampled: 1352000
    num_steps_trained: 1352

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1352,42682,1352000,-3.4304,-0.22,-13.91,229.97


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1353000
  custom_metrics: {}
  date: 2021-10-24_07-52-33
  done: false
  episode_len_mean: 228.11
  episode_media: {}
  episode_reward_max: -1.2100000000000015
  episode_reward_mean: -3.3282999999999925
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3717
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.919499246037873e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.03195007642110189
          total_loss: 63058812017.77778
          vf_explained_var: -1.748402951307071e-07
          vf_loss: 63058812017.77778
    num_agent_steps_sampled: 1353000
    num_agent_steps_trained: 1353000
    num_steps_sampled: 1353000
    num_steps_trained: 1353000
  iterations_since_restore

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1353,42731.7,1353000,-3.3283,-1.21,-13.91,228.11




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1354000
  custom_metrics: {}
  date: 2021-10-24_07-53-42
  done: false
  episode_len_mean: 227.92
  episode_media: {}
  episode_reward_max: -1.2100000000000015
  episode_reward_mean: -3.2919999999999927
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3722
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.4597496230189365e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.9941482967800565
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.11715977084305551
          total_loss: 4942552819331560.0
          vf_explained_var: 0.06302469968795776
          vf_loss: 1.4822153199029436e+16
    num_agent_steps_sampled: 1354000
    num_agent_steps_trained: 1354000
    num_steps_sampled: 1354000
    num_steps_trained: 1354000
  iter

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1354,42800.5,1354000,-3.292,-1.21,-13.91,227.92




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1355000
  custom_metrics: {}
  date: 2021-10-24_07-54-26
  done: false
  episode_len_mean: 228.52
  episode_media: {}
  episode_reward_max: -1.2100000000000015
  episode_reward_mean: -3.3783999999999916
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3726
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.189624434528405e-48
          cur_lr: 5.000000000000001e-05
          entropy: 1.1156533585654365
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.07500408275259865
          total_loss: 8641302302993.066
          vf_explained_var: -0.42431768774986267
          vf_loss: 4021640030890.6665
    num_agent_steps_sampled: 1355000
    num_agent_steps_trained: 1355000
    num_steps_sampled: 1355000
    num_steps_trained: 1355000
  iteration

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1355,42844.4,1355000,-3.3784,-1.21,-13.91,228.52




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1356000
  custom_metrics: {}
  date: 2021-10-24_07-55-16
  done: false
  episode_len_mean: 227.07
  episode_media: {}
  episode_reward_max: -1.329999999999988
  episode_reward_mean: -3.337399999999992
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3731
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.2844366517926076e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.6653499490684933
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.1097643600569831
          total_loss: 244351926272.0
          vf_explained_var: -0.04063703119754791
          vf_loss: 161829978931.2
    num_agent_steps_sampled: 1356000
    num_agent_steps_trained: 1356000
    num_steps_sampled: 1356000
    num_steps_trained: 1356000
  iterations_since_r

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1356,42894.2,1356000,-3.3374,-1.33,-13.91,227.07




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1357000
  custom_metrics: {}
  date: 2021-10-24_07-56-01
  done: false
  episode_len_mean: 227.48
  episode_media: {}
  episode_reward_max: -1.329999999999988
  episode_reward_mean: -3.2914999999999925
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3735
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 4.92665497768891e-48
          cur_lr: 5.000000000000001e-05
          entropy: 1.2012583136558532
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.24420291570325692
          total_loss: 21844033798144.0
          vf_explained_var: -0.02366313710808754
          vf_loss: 34238639633567.29
    num_agent_steps_sampled: 1357000
    num_agent_steps_trained: 1357000
    num_steps_sampled: 1357000
    num_steps_trained: 1357000
  iterations_sin

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1357,42939.6,1357000,-3.2915,-1.33,-13.91,227.48




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1358000
  custom_metrics: {}
  date: 2021-10-24_07-56-44
  done: false
  episode_len_mean: 227.66
  episode_media: {}
  episode_reward_max: -1.329999999999988
  episode_reward_mean: -3.2536999999999923
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3739
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 7.389982466533367e-48
          cur_lr: 5.000000000000001e-05
          entropy: 0.8574604156944486
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.20351172470384174
          total_loss: 3285214376755.2
          vf_explained_var: -0.21475465595722198
          vf_loss: 3196107317430.0444
    num_agent_steps_sampled: 1358000
    num_agent_steps_trained: 1358000
    num_steps_sampled: 1358000
    num_steps_trained: 1358000
  iterations_s

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1358,42982.5,1358000,-3.2537,-1.33,-13.91,227.66


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1359000
  custom_metrics: {}
  date: 2021-10-24_07-57-36
  done: false
  episode_len_mean: 226.27
  episode_media: {}
  episode_reward_max: -1.329999999999988
  episode_reward_mean: -3.1211999999999933
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3744
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.108497369980005e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.8938672456476423
          entropy_coeff: 0.009999999999999998
          kl: 1.4338390722870826
          policy_loss: -0.002081800086630715
          total_loss: 3919542616064.0
          vf_explained_var: -0.8708451390266418
          vf_loss: 3919542616064.0
    num_agent_steps_sampled: 1359000
    num_agent_steps_trained: 1359000
    num_steps_sampled: 1359000
    num_steps_trained: 1359000
  i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1359,43035,1359000,-3.1212,-1.33,-13.91,226.27




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1360000
  custom_metrics: {}
  date: 2021-10-24_07-58-18
  done: false
  episode_len_mean: 225.53
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -3.0794999999999932
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3748
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 1.6627460549700075e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.8115842791895072
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: -0.050100843442810905
          total_loss: 537070056243.2
          vf_explained_var: -0.07450584322214127
          vf_loss: 3547514959007.289
    num_agent_steps_sampled: 1360000
    num_agent_steps_trained: 1360000
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1360,43076.3,1360000,-3.0795,-1.73,-13.91,225.53




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1361000
  custom_metrics: {}
  date: 2021-10-24_07-59-11
  done: false
  episode_len_mean: 226.2
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -3.1548999999999925
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3752
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 2.4941190824550106e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.999446091387007
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.006039063798056708
          total_loss: 3033472600200.533
          vf_explained_var: -0.13298332691192627
          vf_loss: 1478851168574.578
    num_agent_steps_sampled: 1361000
    num_agent_steps_trained: 1361000
    num_steps_sampled: 1361000
    num_steps_trained: 1361000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1361,43129.8,1361000,-3.1549,-1.73,-13.91,226.2




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1362000
  custom_metrics: {}
  date: 2021-10-24_08-00-00
  done: false
  episode_len_mean: 225.62
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -3.1685999999999934
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3756
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 3.741178623682516e-47
          cur_lr: 5.000000000000001e-05
          entropy: 1.0357656598091125
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.004608476327525245
          total_loss: 7328295533499.733
          vf_explained_var: -0.5432868003845215
          vf_loss: 3908244711378.489
    num_agent_steps_sampled: 1362000
    num_agent_steps_trained: 1362000
    num_steps_sampled: 1362000
    num_steps_trained: 1362000
  iterations_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1362,43178.3,1362000,-3.1686,-1.73,-13.91,225.62




Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1363000
  custom_metrics: {}
  date: 2021-10-24_08-00-40
  done: false
  episode_len_mean: 225.27
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -3.302999999999993
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 4
  episodes_total: 3760
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 5.611767935523775e-47
          cur_lr: 5.000000000000001e-05
          entropy: 1.0106319983800252
          entropy_coeff: 0.009999999999999998
          kl: .inf
          policy_loss: 0.1787847739126947
          total_loss: 1347655783765.3333
          vf_explained_var: 0.10106462985277176
          vf_loss: 799030760425.2444
    num_agent_steps_sampled: 1363000
    num_agent_steps_trained: 1363000
    num_steps_sampled: 1363000
    num_steps_trained: 1363000
  iterations_si

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1363,43218.5,1363000,-3.303,-1.73,-13.91,225.27


Result for PPO_my_env_bff91_00000:
  agent_timesteps_total: 1364000
  custom_metrics: {}
  date: 2021-10-24_08-01-29
  done: false
  episode_len_mean: 223.62
  episode_media: {}
  episode_reward_max: -1.7300000000000013
  episode_reward_mean: -3.295899999999993
  episode_reward_min: -13.909999999999968
  episodes_this_iter: 5
  episodes_total: 3765
  experiment_id: 308d04df514f4288854eb163945dc05a
  hostname: 0b59607b37d7
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 8.417651903285661e-47
          cur_lr: 5.000000000000001e-05
          entropy: 0.0
          entropy_coeff: 0.009999999999999998
          kl: 0.0
          policy_loss: -0.06497392306725185
          total_loss: 30290727424.0
          vf_explained_var: 0.14537595212459564
          vf_loss: 30290727424.0
    num_agent_steps_sampled: 1364000
    num_agent_steps_trained: 1364000
    num_steps_sampled: 1364000
    num_steps_trained: 1364000
  iterations_since_restore: 1364
  nod

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_my_env_bff91_00000,RUNNING,172.17.0.2:57533,1364,43267.6,1364000,-3.2959,-1.73,-13.91,223.62


Process _WandbLoggingProcess-1:
Traceback (most recent call last):
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/root/miniconda/envs/py37/lib/python3.7/site-packages/ray/tune/integration/wandb.py", line 200, in run
    result = self.queue.get()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/root/miniconda/envs/py37/lib/python3.7/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/miniconda/en

KeyboardInterrupt: 